def test_StatesMatrixMerger(): val_mat1_text = list(tools.ngram(test_text_df["text"].values[0], [1])) val_mat2_text = list(tools.ngram(test_text_df["text"].values[1], [1])) val_mat1 = ValuesMatrix(val_mat1_text, force2d="as_col") val_mat2 = ValuesMatrix(val_mat2_text, force2d="as_col") idx_data_mat1 = val_mat1.build_index_data_matrix() idx_data_mat2 = val_mat2.build_index_data_matrix() idx_data_mat1_old_ref_data = idx_data_mat1.states_matrix._ref_data idx_data_mat2_old_ref_data = idx_data_mat2.states_matrix._ref_data old_idx_data_mat1 = idx_data_mat1.index_matrix.copy() old_idx_data_mat2 = idx_data_mat2.index_matrix.copy() assert len(idx_data_mat1_old_ref_data) > 0 assert len(idx_data_mat2_old_ref_data) > 0 states_mats_merger = StatesMatrixMerger(idx_data_mat1.states_matrix, idx_data_mat2.states_matrix) assert len(states_mats_merger._unique_states_matrix_ids) > 1 states_mats_merger.update() assert len(idx_data_mat1_old_ref_data) == 0 assert len(idx_data_mat2_old_ref_data) == 0 assert id(idx_data_mat1.states_matrix) == id(idx_data_mat2.states_matrix) assert len(idx_data_mat2.states_matrix._ref_data) > 1 assert np.array_equal(idx_data_mat1.index_matrix, old_idx_data_mat1) assert not np.array_equal(idx_data_mat2.index_matrix, old_idx_data_mat2) assert np.array_equal(idx_data_mat1._data, val_mat1) assert np.array_equal(idx_data_mat2._data, val_mat2)
def test_Matrix_build_row_struct_index(): val_mat1_text = list(tools.ngram(test_text_df["text"].values[0], [1])) val_mat2_text = list(tools.ngram(test_text_df["text"].values[1], [1])) val_mat1 = ValuesMatrix(val_mat1_text, force2d="as_col") val_mat2 = ValuesMatrix(val_mat2_text, force2d="as_col") val_mat = ValuesMatrix(np.concatenate((val_mat1._1d_ngram(2), val_mat2._1d_ngram(2)), axis=0)) states_matrix, idx_matrix = val_mat.build_row_struct_index() assert np.array_equal(states_matrix._eval(idx_matrix), val_mat)