def test_extractFeatures_keyerror(): # Test when some groundtruth are not in the local caseversion file cvs = finddup.loadLocalCaseversions('tests/data/small_274_0_key_error.json') gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv', cvs['objects']) features = finddup.extractFeatures(cvs, gt['ids']) assert(features.shape[0] == len(gt['perdictions']))
def test_calcDiffs(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json') comb_it = finddup.genAllCombinations(cvs) selected_pairs = [next(comb_it) for i in range(2)] diffs = filters.calcDiffs(cvs, selected_pairs) assert (len(selected_pairs) == len(diffs)) assert (type(diffs[0]) == type([])) assert (type(diffs[0][0]) == type(""))
def test_extractFeatures_default(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json') #gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv') comb = [x for x in finddup.genAllCombinations(cvs)] #FIXME: remove this dependency features = finddup.extractFeatures(cvs, comb) print(len(cvs['objects'])) pairs_count = len(cvs['objects']) * (len(cvs['objects']) - 1) / 2 assert(features.shape[0] == pairs_count)
def test_loadGroundTruth_filter_by_csv(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0_key_error.json') groundtruth = finddup.loadGroundTruth('tests/data/groundtruth-274.csv', cvs['objects']) expected = { "perdictions": [ "merge", "none" ], "ids": [ { "lhs_id": "210201", "rhs_id": "210202" }, # 210521 does not exist in csv { "lhs_id": "210201", "rhs_id": "211079" } ] } assert(groundtruth == expected)
def test_extractFeatures_select(): cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json') gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv') features = finddup.extractFeatures(cvs, gt['ids']) assert(features.shape[0] == len(gt['perdictions']))