예제 #1
0
    join = MetaRule([body0, body1], [[[attr_name + "1"], [attr_name + "2"]]], alpha, False)
    print("done join (" + str(time.time()-begtime) + "s)")
    proj = Project(join, [attr_name + "0", attr_name + "3"])
    print("done project (" + str(time.time()-begtime) + "s)")
    metap = copy.deepcopy(body0)
    metap.df.columns = [attr_name + "0", attr_name + "3"]
    disj = DisjunctionRule([metap, proj], alpha, 0)
    print("done disjunction (" + str(time.time()-begtime) + "s)")
        
    meta = disj
    df = labels_df_train
    label = 'Label'
    step = 1e-3
    batch_size = 32
    epochs = 1000
    y = align_labels(meta, df, label)

    #tmp_df = meta.df
    #tmp_df["Label"] = y.numpy()
    #links = tmp_df.loc[(tmp_df[label] == 1)]
    #for index, row in links.iterrows():
    #    reverse = tmp_df.loc[(tmp_df[attr_name + "0"] == row[attr_name + "3"]) & (tmp_df[attr_name + "3"] == row[attr_name + "0"])]        
    #    if reverse.shape[0] > 0:
    #        tmp_df.loc[(tmp_df[attr_name + "0"] == row[attr_name + "3"]) & (tmp_df[attr_name + "3"] == row[attr_name + "0"]), [label]] = 1
    #y = torch.FloatTensor(tmp_df[[label]].values)
    #tmp_df.drop([label], axis=1, inplace=True)
    
    print("done label alignment (" + str(time.time()-begtime) + "s)")
    data = TensorDataset(torch.arange(y.size()[0]), y)
    all_loader = DataLoader(data, batch_size=batch_size, shuffle=True)
예제 #2
0
    body1 = copy.deepcopy(body0)
    body1.df.columns = [attr_name + "2", attr_name + "3"]
    join = MetaRule([body0, body1], [[[attr_name + "1"], [attr_name + "2"]]],
                    alpha, False)
    print("done join (" + str(time.time() - begtime) + "s)")
    proj = Project(join, [attr_name + "0", attr_name + "3"])
    print("done project (" + str(time.time() - begtime) + "s)")

    #train(proj, labels_df_train, 'Label', 1e-3, 32, 400, True)

    meta = proj
    label = 'Label'
    step = 1e-1
    batch_size = 32
    epochs = 1000
    y = align_labels(meta, labels_df_train, label)
    print("done label alignment (" + str(time.time() - begtime) + "s)")

    pos_idx = np.nonzero(y.numpy())[0].tolist()
    pos_loader = DataLoader(TensorDataset(torch.LongTensor(pos_idx)),
                            batch_size=batch_size,
                            shuffle=True)
    optimizer = optim.Adam(meta.parameters(), lr=step)
    loss_fn = nn.BCEWithLogitsLoss(reduction="sum")

    #basemetapredicate alpha: is predicates, project: sum
    iter = 0
    for epoch in range(epochs):
        pos_loss = 0.0
        for idx in pos_loader:
            yb = torch.ones(idx[0].size()[0], 1)