예제 #1
0
    labels_df_test = dfs_test[target]
    labels_df_test.columns = [attr_name + "0", attr_name + "3"]

    true_links = pd.concat([labels_df_train, labels_df_test])
    true_links = true_links.loc[true_links['Relation'] == target]
    true_links.drop(['Relation'], axis=1, inplace=True)

    cols = true_links.columns.tolist()
    inv_true_links = true_links[[cols[1], cols[0]]].copy()
    inv_true_links.columns = [cols[0], cols[1]]

    mask = [1] * len(rel_names_train)
    pos = rel_names_train.index(target)
    mask[pos:pos + 2] = [0] * 2
    yhat = score(disj, 1024, target, mask)
    print("done evaluation (" + str(time.time() - begtime) + "s)")

    mrr = 0.0
    hits10 = 0.0
    hits1 = 0.0
    hits3 = 0.0
    ranks = []

    scores = pd.merge(labels_df_test,
                      yhat,
                      left_on=[attr_name + "0", attr_name + "3"],
                      right_on=[attr_name + "0", attr_name + "3"],
                      how='left')
    scores.fillna(0, inplace=True)
    print("done retrieving scores (" + str(time.time() - begtime) + "s)")
예제 #2
0
                            names=['id', 'type'])

    dfs_test = load_metadata(background_fname)
    load_data(facts_fname_test, dfs_test)

    labels_df_test = dfs_test[target]
    labels_df_test.columns = [attr_name + "0", attr_name + "3"]

    true_links = pd.concat([labels_df_train, labels_df_test])
    true_links.drop(['Label'], axis=1, inplace=True)

    cols = true_links.columns.tolist()
    inv_true_links = true_links[[cols[1], cols[0]]].copy()
    inv_true_links.columns = [cols[0], cols[1]]

    yhat = score(proj, 1024)
    yhat[['new_' + attr_name + "3"]] = yhat[[attr_name + "3"
                                             ]].apply(pd.to_numeric)
    yhat = pd.merge(yhat, dest2type, \
                    left_on=['new_' + attr_name + "3"], \
                    right_on=['id'], \
                    how='left')
    yhat.drop(['new_' + attr_name + "3", "id"], axis=1, inplace=True)
    yhat.fillna("null", inplace=True)
    print("done evaluation (" + str(time.time() - begtime) + "s)")

    mrr = 0.0
    hits10 = 0.0
    hits1 = 0.0
    hits3 = 0.0
    ranks = []
예제 #3
0
            
    dfs_test = load_metadata(background_fname)
    load_data(facts_fname_test, dfs_test)

    labels_df_test = dfs_test[target]
    labels_df_test.columns = [attr_name + "0", attr_name + "3"]
    labels_df_test['Label'] = 1.0

    true_links = pd.concat([labels_df_train, labels_df_test])
    true_links.drop(['Label'], axis=1, inplace=True)

    cols = true_links.columns.tolist()
    inv_true_links = true_links[[cols[1], cols[0]]].copy()
    inv_true_links.columns = [cols[0], cols[1]]
    
    yhat = score(disj, 1024)
    print("done evaluation (" + str(time.time()-begtime) + "s)")

    mrr = 0.0
    hits10 = 0.0
    hits1 = 0.0
    hits3 = 0.0
    ranks = []
    for index, row in labels_df_test.iterrows():
        src = row[attr_name + "0"]
        dest = row[attr_name + "3"]
        print("query: " + src + " " + dest)

        #looking for dest; minerva evaluates this way
        score4dest, rank4dest = eval(yhat, attr_name + "0", src, attr_name + "3", dest, true_links)
        #score4dest_inv, rank4dest_inv = eval(yhat, attr_name + "0", src, attr_name + "3", dest, inv_true_links)
예제 #4
0
          str(weights.detach().numpy()))

    lnn_beta, lnn_wts, slacks = join.AND['default'].cdd()
    np.set_printoptions(precision=3, suppress=True)
    print("LNN beta, weights: " + \
          str(np.around(lnn_beta.item(), decimals=3)) + " " + str(lnn_wts.detach().numpy()))

    dfs_test = load_metadata(background_fname)
    load_data(facts_fname_test, dfs_test)

    labels_df_test = dfs_test[target]
    labels_df_test.columns = [attr_name + "0", attr_name + "3"]
    print("read test data")

    #yhat = score(disj, batch_size)
    yhat = score(proj, batch_size)
    print("done evaluation (" + str(time.time() - begtime) + "s)")

    test_countries = list(
        set(labels_df_test[[attr_name + "0"
                            ]].to_numpy().transpose()[0].tolist()))
    test_regions = list(
        set(labels_df_test[[attr_name + "3"
                            ]].to_numpy().transpose()[0].tolist()))

    fout = open('auc.csv', 'w')
    for test_c in test_countries:
        for test_r in test_regions:
            check = labels_df_test.loc[
                (labels_df_test[attr_name + "0"] == test_c)
                & (labels_df_test[attr_name + "3"] == test_r)]