def run_model(self): np.random.seed(1) random.seed(1) # feat_data, labels, adj_lists = load_cora() features = nn.Embedding(self.num_nodes, self.num_feats) features.weight = nn.Parameter(torch.FloatTensor(self.feat_data), requires_grad=False) print('Features weight initialized') # features.cuda() if self.if_cuda: features = features.cuda() agg1 = MeanAggregator(features, cuda=self.if_cuda) print('Agg 1 Initialized') enc1 = Encoder(features, self.num_feats, 128, self.adj_lists, agg1, gcn=True, cuda=self.if_cuda) print('Encoder 1 Initialized') agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=self.if_cuda) print('Agg 2 Initialized') enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 64, self.adj_lists, agg2, base_model=enc1, gcn=True, cuda=self.if_cuda) print('Encoder 2 Initialized') enc1.num_sample = 6 enc2.num_sample = 4 graphsage = SupervisedGraphSage(enc2) print('Model is Initialized') print('Model Weights : ') print(enc1.weight) print(enc2.weight) print('End') # graphsage.cuda() train_dataset = Question_Ans(self.df, mode='train', umap=self.user_map, qmap=self.question_map) val_dataset = Question_Ans(self.df, mode='val', umap=self.user_map, qmap=self.question_map) print('Dataloader Class Called') train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=self.batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=self.batch_size, shuffle=False) print('Dataloaded') # rand_indices = np.random.permutation(num_nodes) # test = rand_indices[:1000] # val = rand_indices[1000:1500] # train = list(rand_indices[1500:]) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=self.lr) times = [] for epoch in range(self.num_epochs): phase = 'train' batch = 0 # print('Printing Num Samples') # print('Enc2 : ', graphsage.enc.num_sample) # print('Enc2 features : ', graphsage.enc.features) # print('Hey') # print('Enc1 : ', graphsage.enc..num_samples) running_loss = 0 tk0 = tqdm(train_dataloader, total=int(len(train_dataloader))) confusion_matrix_train = [[0, 0], [0, 0]] for questions, users, ans in tk0: batch += 1 # batch_nodes = train[:256] # random.shuffle(train) start_time = time.time() optimizer.zero_grad() if (self.if_cuda): ans = ans.type(torch.cuda.FloatTensor) else: ans = ans.type(torch.FloatTensor) # print(questions,users) loss, preds = graphsage.loss(questions, users, ans) for i, x in enumerate(preds): confusion_matrix_train[int(preds[i])][int(ans[i])] += 1 metrics = get_metrics(confusion_matrix_train) loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) running_loss += loss.data tk0.set_postfix(loss=(running_loss / (batch * train_dataloader.batch_size)), suffix=str(metrics)) # tk0.set_postfix(suffix=str(metrics)) if (batch % 1000 == 0): print(confusion_matrix_train) val_losses = [] batch = 0 running_loss = 0 confusion_matrix_val = [[0, 0], [0, 0]] tk1 = tqdm(val_dataloader, total=int(len(val_dataloader))) for questions, users, ans in tk1: batch += 1 # batch_nodes = train[:256] # random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss, preds = graphsage.loss(questions, users, ans) for i, x in enumerate(preds): confusion_matrix_val[int(preds[i])][int(ans[i])] += 1 metrics = get_metrics(confusion_matrix_val) val_losses.append(loss) # loss.backward() # optimizer.step() end_time = time.time() times.append(end_time - start_time) running_loss += loss.data tk1.set_postfix(loss=(running_loss / (batch * val_dataloader.batch_size)), suffix=str(metrics)) # tk1.set_postfix(suffix=str(metrics)) if (batch % 1000 == 0): print(confusion_matrix_val) # val_output = graphsage.l(val) # print("Validation F1:", f1_score(labels[val], val_output.data.numpy().argmax(axis=1), average="micro")) # print("Average batch time:", np.mean(times)) return val_losses, graphsage
def run_edgelist( name="chg-miner", edgelist_path="../data/chg-miner/chg-miner-graph.txt", label_path="../data/chg-miner/chg-miner-labels.txt", embedding_path="../poincare/embeddings/poincare_chg_miner_noburn.txt", # used to initialize + for distances embedding_header=False): feat_data, labels, adj_lists, num_nodes = load_edgelist( name, edgelist_path, label_path, embedding_path, embedding_header) features = nn.Embedding(num_nodes, feat_data.shape[1]) features.weight = nn.Parameter(torch.FloatTensor(feat_data), requires_grad=False) # network node_ordering_embeddings = load_embeddings(embedding_path, embedding_header) agg1 = MeanAggregator(features, cuda=True) enc1 = Encoder(features, feat_data.shape[1], 128, adj_lists, agg1, gcn=True, cuda=False, ordering_embeddings=None) agg2 = MeanAggregator(lambda nodes: enc1(nodes).t(), cuda=False) enc2 = Encoder(lambda nodes: enc1(nodes).t(), enc1.embed_dim, 128, adj_lists, agg2, base_model=enc1, gcn=True, cuda=False, ordering_embeddings=node_ordering_embeddings) # make sure we don't sample -- but change this later? enc1.num_sample = None enc2.num_sample = None graphsage = SupervisedGraphSage(max(labels)[0] + 1, enc2) rand_indices = np.random.permutation(num_nodes) test = rand_indices[:10] val = rand_indices[10:11] train = list(rand_indices[11:]) # 1 for email optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, graphsage.parameters()), lr=0.6) times = [] # embeds = None for batch in range(1000): batch_nodes = train[:256] random.shuffle(train) start_time = time.time() optimizer.zero_grad() loss = graphsage.loss( batch_nodes, Variable(torch.LongTensor(labels[np.array(batch_nodes)]))) # embeds = graphsage.embed(batch_nodes).detach().numpy() loss.backward() optimizer.step() end_time = time.time() times.append(end_time - start_time) print batch, loss.data[0] val_output = graphsage.forward(test) print "Test F1:", f1_score(labels[test], val_output.data.numpy().argmax(axis=1), average="macro") print "Test Accuracy:", accuracy_score( labels[test], val_output.data.numpy().argmax(axis=1)) print "Average batch time:", np.mean(times) out = open('embeddings/' + 'graphsage_' + edgelist_path.split('/')[-1], 'wb+') embeddings = graphsage.embed(np.arange(num_nodes)).detach().numpy() for i in range(0, embeddings.shape[0]): s = str(int(i)) + ' ' s += ' '.join([str(x) for x in embeddings[i]]) s += '\n' out.write(s) out.close()