コード例 #1
0
 def _get_matches_list(matches: csr_matrix) -> pd.DataFrame:
     """Returns a list of all the indices of matches"""
     r, c = matches.nonzero()
     matches_list = pd.DataFrame({'master_side': r.astype(np.int64),
                                  'dupe_side': c.astype(np.int64),
                                  'similarity': matches.data})
     return matches_list
コード例 #2
0
    def make_non_zero_information(
            self, weight_csr_matrix: csr_matrix) -> List[ROW_COL_VAL]:
        """Construct Tuple of matrix value. Return value is array of ROW_COL_VAL namedtuple.

        :param weight_csr_matrix:
        :return:
        """
        assert isinstance(weight_csr_matrix, (csr_matrix, ndarray))

        row_col_index_array = weight_csr_matrix.nonzero()
        row_indexes = row_col_index_array[0]
        column_indexes = row_col_index_array[1]
        assert len(row_indexes) == len(column_indexes)

        value_index_items = [None] * len(
            row_indexes)  # type: List[ROW_COL_VAL]
        for i in range(0, len(row_indexes)):
            value_index_items[i] = ROW_COL_VAL(
                row_indexes[i], column_indexes[i],
                self.__get_value_index(row_indexes[i], column_indexes[i],
                                       weight_csr_matrix))
        return value_index_items
コード例 #3
0
def make_non_zero_information(weight_csr_matrix:csr_matrix):
    """Construct Tuple of matrix value. Return value is array of ROW_COL_VAL namedtuple.

    :param weight_csr_matrix:
    :return:
    """
    assert isinstance(weight_csr_matrix, csr_matrix)

    row_col_index_array = weight_csr_matrix.nonzero()
    row_indexes = row_col_index_array[0]
    column_indexes = row_col_index_array[1]
    assert len(row_indexes) == len(column_indexes)

    value_index_items = [
        ROW_COL_VAL(
            row_indexes[i],
            column_indexes[i],
            __get_value_index(row_indexes[i], column_indexes[i], weight_csr_matrix)
        )
        for i
        in range(0, len(row_indexes))]

    return value_index_items