class GCN(torch.nn.Module): def __init__(self, num_node_features, num_gcn_layers, add_dropout, n_nodes1, n_nodes2): super(GCN, self).__init__() self.num_gcn_layers = num_gcn_layers self.add_dropout = add_dropout # 2 convolutional layers, 1 linear layer self.conv1 = GCNConv(num_node_features, n_nodes1) if self.num_gcn_layers == 2: self.conv2 = GCNConv(n_nodes1, n_nodes2) self.lin1 = torch.nn.Linear(n_nodes2, 1) else: self.lin1 = torch.nn.Linear(n_nodes1, 1) def forward(self, data, weights_matrix): x, edge_index = data.x, data.edge_index # Activation function elu, very often used for GCNs. x = self.conv1.forward(x, edge_index, weights_matrix) x = F.elu(x) if self.add_dropout: x = F.dropout(x, training=self.training) if self.num_gcn_layers == 2: x = self.conv2.forward(x, edge_index, weights_matrix) x = F.elu(x) if self.add_dropout: x = F.dropout(x, training=self.training) x = self.lin1.forward(x) return x.view(x.shape[0])
class encoder_model_extended_embedding(encoder_model): def __init__(self, args, metric_module, **kwargs): # add some vector like Onto2vec BiLSTM BERT into GCN super(encoder_model_extended_embedding, self).__init__(args, metric_module, **kwargs) if 'pretrained_weight' in kwargs: self.label_embedding = nn.Embedding(kwargs['num_of_word'], kwargs['word_vec_dim']) self.label_embedding.weight.data.copy_( torch.from_numpy(kwargs['pretrained_weight'])) # freeze emb by doing @fix_word_emb # No need... self.label_embedding.requires_grad=False ## turn of gradient here ? else: print( '\n\nERROR: Must provide pretrained_weight for this model\n\n') exit() self.gcn1 = GCNConv( args.def_emb_dim + args.gcn_native_emb_dim, args.gcnn_dim) # args.def_emb_dim + args.gcn_native_emb_dim self.gcn2 = GCNConv(args.gcnn_dim, args.gcnn_dim) # self.LinearCombine = nn.Linear(args.def_emb_dim+args.gcnn_dim, args.gcnn_dim) self.dropout = nn.Dropout(p=kwargs['dropout']) ## let gcn capture information not found in BERT or BiLSTM or whatever. self.gcn_native_emb = nn.Embedding(args.num_label, args.gcn_native_emb_dim) self.gcn_native_emb.weight.data.normal_(mean=0.0, std=0.2) def gcn_2layer(self, labeldesc_loader, edge_index): combined_embed = torch.cat( (self.label_embedding.weight, self.gcn_native_emb.weight), 1) node_emb = self.nonlinear_gcnn( self.gcn1.forward( self.dropout(combined_embed), edge_index)) ## take in entire label space at once node_emb = self.gcn2.forward( node_emb, edge_index) ## not relu or tanh in last layer ## try concat @label_embedding after we call gcn # node_emb = self.nonlinear_gcnn ( self.gcn1.forward ( self.dropout ( self.gcn_native_emb.weight ), edge_index) ) # node_emb = self.gcn2.forward (node_emb, edge_index) ## not relu or tanh in last layer # node_emb = torch.cat((self.label_embedding.weight, node_emb), 1) # node_emb = self.LinearCombine(node_emb) return node_emb
class GCN(nn.Module): def __init__(self, node_in_dim,node_out_dim=64, heads=1, dropout=0.1 ): super(GCN, self).__init__() self.conv1 = GCNConv(node_in_dim, node_out_dim) self.conv2 = GCNConv(node_out_dim, node_out_dim) def forward(self, x, edge_index, edge_attr=None): x = self.conv1.forward(x, edge_index) x = self.conv2.forward(x, edge_index) return x
from torch_geometric.nn import GCNConv ## do we need entire graph? how do we know indexing in a batch match with edge x = torch.tensor([[2, 1], [5, 6], [3, 7], [12, 0]], dtype=torch.float) y = torch.tensor([0, 1, 0, 1], dtype=torch.float) # the first list contains the index of the source nodes, # while the index of target nodes is specified in the second list. edge_index = torch.tensor([[0, 1, 2, 0, 3], [1, 0, 1, 3, 2]], dtype=torch.long) data = Data(x=x, y=y, edge_index=edge_index) gcn = GCNConv(2, 3) gcn.forward(x, edge_index) emb = nn.Embedding(10, 2) emb.weight.shape gcn.forward(emb.weight, edge_index) from torch_geometric.datasets import Planetoid from torch_geometric.data import DataLoader dataset = Planetoid(root='/tmp/Cora', name='Cora') dataset.edge_index loader = DataLoader(dataset, batch_size=32, shuffle=True)
class encoder_model(nn.Module): def __init__(self, args, metric_module, **kwargs): # metric_module is either @entailment_model or cosine distance. # we observe that entailment_model doesn't directly ensure the same labels to have the same vectors. entailment_model pass concatenate v1,v2,v1*v2,abs(v1-v2) into an MLP super(encoder_model, self).__init__() # able to call BERT, but we will not be able to encode BERT for many nodes at once. # self.tokenizer = tokenizer # self.bert_lm_sentence = bert_lm_sentence ## bert LM already tuned model self.metric_option = kwargs[ 'metric_option'] ## should be 'entailment' or 'cosine' self.metric_module = metric_module self.args = args self.gcn1 = GCNConv(args.def_emb_dim, args.gcnn_dim) self.gcn2 = GCNConv(args.gcnn_dim, args.gcnn_dim) self.label_embedding = nn.Embedding( args.num_label, args.def_emb_dim) ## each label is a vector self.classify_loss = nn.CrossEntropyLoss() self.nonlinear_gcnn = kwargs['nonlinear_gcnn'] self.dropout = nn.Dropout(kwargs['dropout']) self.optimizer = None # def do_gcn(self,input_idx,edge_index): ## @input_idx is simple label indexing [0 10 5] = take in label #0 #10 #5 # label_emb = self.label_embedding(input_idx) ## batch x sent_len x dim # return self.gcn (label_emb,edge_index) def gcn_2layer(self, labeldesc_loader, edge_index): node_emb = self.nonlinear_gcnn( self.gcn1.forward( self.dropout(self.label_embedding.weight), edge_index)) ## take in entire label space at once return self.gcn2.forward(node_emb, edge_index) ## not relu or tanh in last layer def make_optimizer(self): if self.args.fix_word_emb: print([ n for n, p in self.named_parameters() if "label_embedding" not in n ]) return torch.optim.Adam([ p for n, p in self.named_parameters() if "label_embedding" not in n ], lr=self.args.lr) else: return torch.optim.Adam(self.parameters(), lr=self.args.lr) def do_train(self, train_dataloader, labeldesc_loader, edge_index, dev_dataloader=None): torch.cuda.empty_cache() optimizer = self.make_optimizer() eval_acc = 0 lowest_dev_loss = np.inf for epoch in range(int(self.args.epoch)): self.train() tr_loss = 0 ## for each batch for step, batch in enumerate( tqdm(train_dataloader, desc="ent. epoch {}".format(epoch))): label_emb = self.gcn_2layer(labeldesc_loader, edge_index) batch = tuple(t for t in batch) label_id_number_left, label_id_number_right, label_ids = batch label_id_number_left = label_id_number_left.squeeze( 1 ).data.numpy( ) ## using as indexing, so have to be array int, not tensor label_id_number_right = label_id_number_right.squeeze( 1).data.numpy() ## need to backprop somehow ## predict the class bio/molec/cellcompo ? ## predict if 2 labels are similar ? ... sort of doing the same thing as gcn already does loss, _ = self.metric_module.forward( label_emb[label_id_number_left], label_emb[label_id_number_right], true_label=label_ids.cuda()) loss.backward() optimizer.step() optimizer.zero_grad() tr_loss = tr_loss + loss ## end epoch # eval at each epoch # print ('\neval on train data epoch {}'.format(epoch)) # result, _ , _ = self.do_eval(train_dataloader,labeldesc_loader,edge_index) print('\neval on dev data epoch {}'.format(epoch)) result, preds, dev_loss = self.do_eval(dev_dataloader, labeldesc_loader, edge_index) if dev_loss < lowest_dev_loss: lowest_dev_loss = dev_loss print("save best, lowest dev loss {}".format(lowest_dev_loss)) torch.save( self.state_dict(), os.path.join(self.args.result_folder, "best_state_dict.pytorch")) last_best_epoch = epoch if epoch - last_best_epoch > 20: print('\n\n\n**** break early \n\n\n') print('') return tr_loss return tr_loss ## last train loss def do_eval(self, train_dataloader, labeldesc_loader, edge_index): torch.cuda.empty_cache() self.eval() tr_loss = 0 preds = [] all_label_ids = [] with torch.no_grad(): ## don't need to update labels anymore label_emb = self.gcn_2layer(labeldesc_loader, edge_index) print('sample gcn label_emb') print(label_emb) ## for each batch for step, batch in enumerate(tqdm(train_dataloader, desc="eval")): batch = tuple(t for t in batch) label_id_number_left, label_id_number_right, label_ids = batch label_id_number_left = label_id_number_left.squeeze(1).data.numpy( ) ## using as indexing, so have to be array int, not tensor label_id_number_right = label_id_number_right.squeeze( 1).data.numpy() ## need to backprop somehow ## predict the class bio/molec/cellcompo ? ## predict if 2 labels are similar ? ... sort of doing the same thing as gcn already does with torch.no_grad(): loss, score = self.metric_module.forward( label_emb[label_id_number_left], label_emb[label_id_number_right], true_label=label_ids.cuda()) tr_loss = tr_loss + loss if len(preds) == 0: preds.append(score.detach().cpu().numpy()) all_label_ids.append(label_ids.detach().cpu().numpy()) else: preds[0] = np.append(preds[0], score.detach().cpu().numpy(), axis=0) all_label_ids[0] = np.append(all_label_ids[0], label_ids.detach().cpu().numpy(), axis=0) # row array # end eval all_label_ids = all_label_ids[0] preds = preds[0] if self.metric_option == 'entailment': preds = softmax( preds, axis=1) ## softmax, return both prob of 0 and 1 for each label print(preds) print(all_label_ids) result = 0 if self.args.test_file is None: ## save some time result = acc_and_f1( preds, all_label_ids, self.metric_option ) ## interally, we will take care of the case of @entailment vs @cosine for key in sorted(result.keys()): print("%s=%s" % (key, str(result[key]))) return result, preds, tr_loss