QuestionId = X.loc[:, 'QuestionId'].drop_duplicates() QuestionId = QuestionId[QuestionId > 0] # get data_loader from sklearn.model_selection import KFold kf = KFold(n_splits=5, shuffle=True) Dataset = np.array(QuestionId[QuestionId != 0]) # get adj edges = dataset.getAdjList_clique() edges2 = dataset.getAdjList_Similarity1() edges3 = dataset.getAdjList_Similarity2() #edges4 = dataset.getAdjList_Similarity3(Pairs_train.values.tolist()) edges5 = dataset.getAdjMatrix_Identity(len(X)) Adj, rowsum = get_GCN_inputs(edges, len(X)) Adj2, rowsum = get_GCN_inputs3(edges2, len(X)) Adj3, rowsum = get_GCN_inputs3(edges3, len(X)) #Adj4, rowsum = get_GCN_inputs3(edges4, len(X)) Adj5, rowsum = get_GCN_inputs2(edges5, len(X)) Adj = Adj.to(device) Adj2 = Adj2.to(device) Adj3 = Adj3.to(device) Adj5 = Adj5.to(device) # setting of GCN nComm = 1 nHid1 = 50 nHid2 = 30 nHid3 = 20 nHid4 = 10 #gcn_model.load_state_dict(torch.load('gcn_complete2.pkl'))
## PreProcess the dataset cols = X.columns.drop('QTags') X[cols] = X[cols].apply(pd.to_numeric, errors='coerce') X.fillna(0, inplace=True) X["PairId"] = X["PairId"].apply(lambda x: int(x)-1) print len(X) #print X #edges =dataset.getAdjList_tags() #edges = dataset.getAdjList_allTags() #edges = dataset.getAdjList_lineGraph() edges2 = dataset.getAdjList_clique() edges = dataset.getAdjMatrix_Identity(len(X)) #edges = np.concatenate((edges1,edges2)) Adj, rowsum = get_GCN_inputs2(edges, len(X)) Adj2, rowsum = get_GCN_inputs(edges2, len(X)) Adj3 = makeAdj3(edges2, len(X)) #Adj2 = get_GCN_inputs(edges2, len(X)) #Adj, rowsum = normalize(Adj+Adj2) #Adj = sparse_mx_to_torch_sparse_tensor(Adj) #print "Adjacency Graph", Adj #print X data,X_Tags_Feature2 = getPostContexts(X, dataset) #data = SparseMM(Adj)(data) X_Tags_Feature = Variable(data, requires_grad=False) #X_Tags_Feature = X_Tags_Feature[:,[0,3,4,5,6,7,8,9,10,13,14,15,16,17,18,19,26,27,28,29,30,31]] #exit()