def _get_matches_list(matches: csr_matrix) -> pd.DataFrame: """Returns a list of all the indices of matches""" r, c = matches.nonzero() matches_list = pd.DataFrame({'master_side': r.astype(np.int64), 'dupe_side': c.astype(np.int64), 'similarity': matches.data}) return matches_list
def make_non_zero_information( self, weight_csr_matrix: csr_matrix) -> List[ROW_COL_VAL]: """Construct Tuple of matrix value. Return value is array of ROW_COL_VAL namedtuple. :param weight_csr_matrix: :return: """ assert isinstance(weight_csr_matrix, (csr_matrix, ndarray)) row_col_index_array = weight_csr_matrix.nonzero() row_indexes = row_col_index_array[0] column_indexes = row_col_index_array[1] assert len(row_indexes) == len(column_indexes) value_index_items = [None] * len( row_indexes) # type: List[ROW_COL_VAL] for i in range(0, len(row_indexes)): value_index_items[i] = ROW_COL_VAL( row_indexes[i], column_indexes[i], self.__get_value_index(row_indexes[i], column_indexes[i], weight_csr_matrix)) return value_index_items
def make_non_zero_information(weight_csr_matrix:csr_matrix): """Construct Tuple of matrix value. Return value is array of ROW_COL_VAL namedtuple. :param weight_csr_matrix: :return: """ assert isinstance(weight_csr_matrix, csr_matrix) row_col_index_array = weight_csr_matrix.nonzero() row_indexes = row_col_index_array[0] column_indexes = row_col_index_array[1] assert len(row_indexes) == len(column_indexes) value_index_items = [ ROW_COL_VAL( row_indexes[i], column_indexes[i], __get_value_index(row_indexes[i], column_indexes[i], weight_csr_matrix) ) for i in range(0, len(row_indexes))] return value_index_items