def test_dedup_single_blocking_key_vs_block(self): indexers = [ NeighbourhoodBlock('var_block10', max_nulls=1), NeighbourhoodBlock( left_on='var_block10', right_on='var_block10', max_nulls=1), Block('var_block10'), ] self.assert_index_comparisons(eq, indexers, self.a) self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)
def test_dedup_multiple_blocking_keys_vs_Block(self): indexers = [ NeighbourhoodBlock(['var_single', 'var_block10'], max_nulls=1), NeighbourhoodBlock(left_on=['var_single', 'var_block10'], right_on=['var_single', 'var_block10'], max_nulls=1), Block(['var_single', 'var_block10']), ] self.assert_index_comparisons(eq, indexers, self.a) self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)
def test_link_single_blocking_key_vs_Block(self): indexers = [ NeighbourhoodBlock('var_arange', max_nulls=1), NeighbourhoodBlock(left_on='var_arange', right_on='var_arange', max_nulls=1), Block('var_arange'), ] self.assert_index_comparisons(eq, indexers, self.a, self.b) self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a, self.incomplete_b)
def test_dedup_single_sorting_key_vs_SortedNeighbourhood(self, window): indexers = [ NeighbourhoodBlock('var_arange', max_nulls=1, windows=window), NeighbourhoodBlock(left_on='var_arange', right_on='var_arange', max_nulls=1, windows=window), SortedNeighbourhood('var_arange', window=window), ] self.assert_index_comparisons(eq, indexers, self.a) self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)
def test_dedup_with_blocking_vs_SortedNeighbourhood(self, window): indexers = [ NeighbourhoodBlock(['var_arange', 'var_block10'], max_nulls=1, windows=[window, 1]), NeighbourhoodBlock(left_on=['var_arange', 'var_block10'], right_on=['var_arange', 'var_block10'], max_nulls=1, windows=[window, 1]), SortedNeighbourhood('var_arange', block_on='var_block10', window=window), ] self.assert_index_comparisons(eq, indexers, self.a) self.assert_index_comparisons(gt, indexers[-2:], self.incomplete_a)
def get_test_algorithms(): """Return list of algorithms""" return [ Full(), Block('var_arange'), SortedNeighbourhood('var_arange'), Random(10, random_state=100, replace=True), Random(10, random_state=100, replace=False), NeighbourhoodBlock('var_arange'), ]
def test_link_vs_full(self): indexers = [ NeighbourhoodBlock(max_non_matches=len(self.a.columns)), Full(), ] self.assert_index_comparisons(eq, indexers, self.a, self.b)