X.fillna(0, inplace=True) X["PairId"] = X["PairId"].apply(lambda x: int(x)-1) #print X edges = dataset.getAdjList_clique() #edges = dataset.getAdjList_lineGraph() #edges = dataset.getAdjList_tags() #edges = dataset.getAdjList_allTags() print "Length of edges returned", len(edges) Adj, rowsum = get_GCN_inputs(edges, len(X)) print "Adjacency Graph", Adj #print X X_Tags_Feature = Variable(getPostContexts(X, dataset), requires_grad=False) print len(X_Tags_Feature[0]) Pairs_train, Pairs_test, Y_train, Y_test = train_test_split(X.loc[:,'PairId'], X.loc[:,'Credible'], test_size=0.2, random_state=1234) trainset = torch.utils.data.TensorDataset(torch.LongTensor(Pairs_train.values), torch.torch.LongTensor(Y_train.values)) testset = torch.utils.data.TensorDataset(torch.LongTensor(Pairs_test.values), torch.torch.LongTensor(Y_test.values)) # Data loader (input pipeline) train_loader = torch.utils.data.DataLoader(dataset=trainset, batch_size=args.batch_size) test_loader = torch.utils.data.DataLoader(dataset=testset, batch_size=len(Pairs_test),
for Pairs_train, Pairs_test in kf.split(Dataset): count += 1 if count == 4: print result_list print sum(result_list) / len(result_list) torch.save( gcn_model.state_dict(), '/home/junting/Downloads/GCN/UserCredibility/Accuracy-Stackexchange/WWW/gcnmodel_' + args.root.split('/')[-1] + '_www' + '.pt') exit() edges4 = dataset.getAdjList_Similarity3(Pairs_train.tolist()) Adj4, rowsum = get_GCN_inputs3(edges4, len(X)) Adj4 = Adj4.to(device) X["Rating"] = dataset.Rating["Rating"] data, X_Tags_Feature2 = getPostContexts(X, dataset) X_Tags_Feature = Variable(data, requires_grad=False).cuda() # setting of GCN nComm = 1 nHid1 = 50 nHid2 = 10 nHid3 = 10 nHid4 = 5 nFeat = X_Tags_Feature.shape[1] gcn_model = GCN_WWW(nFeat, nHid1, nHid2, nHid3, nHid4, nComm).to(device) criterion = nn.MSELoss() gcn_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, gcn_model.parameters()), lr=args.learning_rate,
#NegClass_Sample = pd.DataFrame(NegClass_List, columns=PosClass.columns) if len(PosClass) > len(NegClass): NegClass_Sample = NegClass else: NegClass_Sample = NegClass.sample(n=len(PosClass)) X = pd.concat([PosClass, NegClass_Sample]) ## PreProcess the dataset cols = X.columns.drop('QTags') X[cols] = X[cols].apply(pd.to_numeric, errors='coerce') X.fillna(0, inplace=True) X["PairId"] = X["PairId"].apply(lambda x: int(x) - 1) X_Features_scaled = getPostContexts(X, dataset) X.iloc[:, 6:] = X_Features_scaled print X exit() #Text_Features_df = dataset.getTextFeatures(X) #X_Text = X.merge(Text_Features_df, on='PairId') #dtype = dict(PairId=int) # #print X.dtypes # Posters = X.AskerId.unique() # Commenters = X.CommenterId.unique() # Users = set(Posters).union(set(Commenters)) # # #Create UserIndex # UserIndex = dict()