def test_extractFeatures_keyerror():
    # Test when some groundtruth are not in the local caseversion file
    cvs = finddup.loadLocalCaseversions('tests/data/small_274_0_key_error.json')
    gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv', cvs['objects'])
    features = finddup.extractFeatures(cvs, gt['ids'])

    assert(features.shape[0] == len(gt['perdictions']))
def test_calcDiffs():
    cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json')
    comb_it = finddup.genAllCombinations(cvs)
    selected_pairs = [next(comb_it) for i in range(2)]
    diffs = filters.calcDiffs(cvs, selected_pairs)
    assert (len(selected_pairs) == len(diffs))
    assert (type(diffs[0]) == type([]))
    assert (type(diffs[0][0]) == type(""))
def test_extractFeatures_default():
    cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json')
    #gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv')
    comb = [x for x in finddup.genAllCombinations(cvs)] #FIXME: remove this dependency
    features = finddup.extractFeatures(cvs, comb)

    print(len(cvs['objects']))
    pairs_count = len(cvs['objects']) * (len(cvs['objects']) - 1) / 2
    assert(features.shape[0] == pairs_count)
def test_loadGroundTruth_filter_by_csv():
    cvs = finddup.loadLocalCaseversions('tests/data/small_274_0_key_error.json')
    groundtruth = finddup.loadGroundTruth('tests/data/groundtruth-274.csv', cvs['objects'])
    expected = {
        "perdictions": [
            "merge",
            "none"
        ],
        "ids": [
            {
                "lhs_id": "210201",
                "rhs_id": "210202"
            }, # 210521 does not exist in csv
            {
                "lhs_id": "210201",
                "rhs_id": "211079"
            }
        ]
    }

    assert(groundtruth == expected)
def test_extractFeatures_select():
    cvs = finddup.loadLocalCaseversions('tests/data/small_274_0.json')
    gt = finddup.loadGroundTruth('tests/data/groundtruth-274.csv')
    features = finddup.extractFeatures(cvs, gt['ids'])

    assert(features.shape[0] == len(gt['perdictions']))