def test_getCombinationSlice_step(): caseversions = { "objects":[ { 'id': 12345 }, { 'id': 12346 }, { 'id': 12347 }, { 'id': 12348 }, { 'id': 12349 } ] } comb_it = finddup.genAllCombinations(caseversions) count = 0 for s in finddup.getCombinationSlice(3, comb_it, step=3): count += 1 if (count == 2): assert(1 == len(s)) else: assert(3 == len(s)) print(s) assert(2 == count)
def test_genAllCombinations(): caseversions = { "objects":[ { 'id': 12345 }, { 'id': 12346 }, { 'id': 12347 } ] } expected = [ { 'lhs_id': '12345', 'rhs_id': '12346', }, { 'lhs_id': '12345', 'rhs_id': '12347', }, { 'lhs_id': '12346', 'rhs_id': '12347', } ] comb_it = finddup.genAllCombinations(caseversions) combinations = [next(comb_it) for i in range(3)] #gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv') assert(expected == combinations)
def test_calcDiffs(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json') comb_it = finddup.genAllCombinations(cvs) selected_pairs = [next(comb_it) for i in range(2)] diffs = filters.calcDiffs(cvs, selected_pairs) assert (len(selected_pairs) == len(diffs)) assert (type(diffs[0]) == type([])) assert (type(diffs[0][0]) == type(""))
def test_extractFeatures_default(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json') #gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv') comb = [x for x in finddup.genAllCombinations(cvs)] #FIXME: remove this dependency features = finddup.extractFeatures(cvs, comb) print(len(cvs['objects'])) pairs_count = len(cvs['objects']) * (len(cvs['objects']) - 1) / 2 assert(features.shape[0] == pairs_count)
def test_getCombinationSlice(): caseversions = { "objects":[ { 'id': 12345 }, { 'id': 12346 }, { 'id': 12347 }, { 'id': 12348 } ] } comb_it = finddup.genAllCombinations(caseversions) count = 0 for s in finddup.getCombinationSlice(3, comb_it): assert(3 == len(s)) count += 1 assert(2 == count)