예제 #1
0
def quotient_similarity(m,
                        partition,
                        agg='sum',
                        diag_value=None,
                        check=False,
                        n_cpu=DEFAULT_CPUS):
    '''Generate quotient similarity matrix based on the given partition of matrix rows

    Params:
    - m: A symmetric 2D array (can be sparse) containing the similarity matrix
    - partition: A list-of-lists partitionning the rows/columns of `m`
    - agg: One of ('sum', 'min'. 'max', 'mean', 'getnnz') or a function that takes as parameters the matrix and a range of indices from the partition and aggregates the values across the rows
    - check: Logical. Should the dunction check that `partition` is a valid partition of the rows of m? Default is 'True'

    Returns a similarity matrix reduced to the dimension induced by the partition
    '''

    if callable(agg):
        f_agg = agg
    elif isinstance(agg, str):
        if agg in MATRIX_METHOD_STR:
            f_agg = lambda m, p: csr_matrix(getattr(m[p, :], agg)(axis=0))
        else:
            raise ValueError(MATRIX_METHOD_STR_ERR)
    else:
        raise ValueError(MATRIX_METHOD_STR_ERR)

    if check:
        if not is_partition(partition, start=0, end=m.shape[0] - 1):
            raise ValueError('Please provide a proper partition')

    result = merge_row_partition(
        merge_row_partition(m, partition, f_agg, n_cpu).T, partition, f_agg,
        n_cpu)

    if diag_value is not None:
        result.setdiag(diag_value)

    result.eliminate_zeros()

    return result
예제 #2
0
def test_not_partition2():
    p = [[0], [1, 2, 3], [4, 6]]
    assert not is_partition(p)
예제 #3
0
def test_not_partition1():
    p = [[0], [1, 2], [4, 5]]
    assert not is_partition(p)
예제 #4
0
def test_not_partition():
    p = [[10], [11, 12, 13], [14, 15]]
    assert not is_partition(p, start=10, end=16)
예제 #5
0
def test_is_partition():
    p = [[10], [11, 12, 13], [14, 15]]
    assert is_partition(p, start=10)
예제 #6
0
def test_is_partition():
    p = [[0], [1, 2, 3], [4, 5]]
    assert is_partition(p)