class MethodGraphBertGraphClustering(BertPreTrainedModel): learning_record_dict = {} use_raw_feature = True cluster_number = 0 lr = 0.001 weight_decay = 5e-4 max_epoch = 500 load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config): super(MethodGraphBertGraphClustering, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.init_weights() def forward(self, raw_features, wl_role_ids, init_pos_ids, hop_dis_ids): outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids) sequence_output = 0 for i in range(self.config.k + 1): sequence_output += outputs[0][:, i, :] sequence_output /= float(self.config.k + 1) kmeans = KMeans(n_clusters=self.cluster_number, max_iter=self.max_epoch) if self.use_raw_feature: clustering_result = kmeans.fit_predict(self.data['X']) else: clustering_result = kmeans.fit_predict(sequence_output.tolist()) return {'pred_y': clustering_result, 'true_y': self.data['y']} def train_model(self, max_epoch): t_begin = time.time() clustering = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings']) self.learning_record_dict = clustering def run(self): if self.load_pretrained_path != '': self.use_raw_feature = False print('loading pretrained model from ' + self.load_pretrained_path + '...') self.bert = MethodGraphBert.from_pretrained( self.load_pretrained_path) self.train_model(self.max_epoch) if self.save_pretrained_path != '': print('saving pretrained model to ' + self.save_pretrained_path + '...') self.bert.save_pretrained(self.save_pretrained_path) return self.learning_record_dict
def __init__(self, config): super(MethodGraphBertNodeConstruct, self).__init__(config) self.place = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(device=self.place) self.cls_y = torch.nn.Linear(config.hidden_size, config.x_size).to(device=self.place) self.init_weights()
def __init__(self, config): super(MethodGraphBertNodeClassification, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.res_h = torch.nn.Linear(config.x_size, config.hidden_size) self.res_y = torch.nn.Linear(config.x_size, config.y_size) self.cls_y = torch.nn.Linear(config.hidden_size, config.y_size) self.init_weights()
def __init__(self, config, pretrained_path): super(MethodGraphBertGraphRecovery, self).__init__(config) self.place = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(self.place) if pretrained_path is not None: print("Load pretraiend model from {}".format(pretrained_path)) self.bert.from_pretrained(pretrained_path).to(self.place) self.init_weights()
def __init__(self, config, pretrained_path, dataset_name): super(MethodGraphBertNodeClassification, self).__init__(config) self.place = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(device=self.place) # load from pretrained model if necessary if pretrained_path is not None: self.bert = self.bert.from_pretrained(pretrained_path).to(device=self.place) self.res_h = torch.nn.Linear(config.x_size, config.hidden_size).to(device=self.place) self.res_y = torch.nn.Linear(config.x_size, config.y_size).to(device=self.place) self.cls_y = torch.nn.Linear(config.hidden_size, config.y_size).to(device=self.place) self.init_weights() self.evaluator = Evaluator(dataset_name)
def __init__(self, config, pretrained_path): super(MethodGraphBertNodeClassification, self).__init__(config) self.device = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(self.device) if pretrained_path is not None: print("Load pretrained model from {}".format(pretrained_path)) self.bert.from_pretrained(pretrained_path).to(self.device) self.res_h = torch.nn.Linear(config.x_size, config.hidden_size).to(self.device) self.res_y = torch.nn.Linear(config.x_size, config.y_size).to(self.device) self.cls_y = torch.nn.Linear(config.hidden_size, config.y_size).to(self.device) self.init_weights()
def run(self): if self.load_pretrained_path != '': print('loading pretrained model from ' + self.load_pretrained_path + '...') self.bert = MethodGraphBert.from_pretrained(self.load_pretrained_path) self.train_model(self.max_epoch) if self.save_pretrained_path != '': print('saving pretrained model to ' + self.save_pretrained_path + '...') self.bert.save_pretrained(self.save_pretrained_path) return self.learning_record_dict
class MethodGraphBertNodeClassification(BertPreTrainedModel): learning_record_dict = {} lr = 0.001 weight_decay = 5e-4 max_epoch = 500 spy_tag = True load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config, pretrained_path): super(MethodGraphBertNodeClassification, self).__init__(config) self.device = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(self.device) if pretrained_path is not None: print("Load pretrained model from {}".format(pretrained_path)) self.bert.from_pretrained(pretrained_path).to(self.device) self.res_h = torch.nn.Linear(config.x_size, config.hidden_size).to(self.device) self.res_y = torch.nn.Linear(config.x_size, config.y_size).to(self.device) self.cls_y = torch.nn.Linear(config.hidden_size, config.y_size).to(self.device) self.init_weights() def forward(self, raw_features, wl_role_ids, init_pos_ids, hop_dis_ids, idx=None): residual_h, residual_y = self.residual_term() if idx is not None: if residual_h is None: outputs = self.bert(raw_features[idx], wl_role_ids[idx], init_pos_ids[idx], hop_dis_ids[idx], residual_h=None) else: outputs = self.bert(raw_features[idx], wl_role_ids[idx], init_pos_ids[idx], hop_dis_ids[idx], residual_h=residual_h[idx]) residual_y = residual_y[idx] else: if residual_h is None: outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids, residual_h=None) else: outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids, residual_h=residual_h) sequence_output = 0 for i in range(self.config.k + 1): sequence_output += outputs[0][:, i, :] sequence_output /= float(self.config.k + 1) labels = self.cls_y(sequence_output) if residual_y is not None: labels += residual_y return F.log_softmax(labels, dim=1) def residual_term(self): if self.config.residual_type == 'none': return None, None elif self.config.residual_type == 'raw': return self.res_h(self.data['X']), self.res_y(self.data['X']) elif self.config.residual_type == 'graph_raw': return torch.spmm(self.data['A'], self.res_h(self.data['X'])), torch.spmm( self.data['A'], self.res_y(self.data['X'])) def train_model(self, max_epoch): t_begin = time.time() optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) accuracy = EvaluateAcc('', '') for name in [ 'raw_embeddings', 'wl_embedding', 'int_embeddings', 'hop_embeddings', 'idx_train', 'y', 'X', 'A' ]: self.data[name] = self.data[name].to(self.device) max_score = 0.0 for epoch in range(max_epoch): t_epoch_begin = time.time() self.train() optimizer.zero_grad() output = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings'], self.data['idx_train']) loss_train = F.cross_entropy( output, self.data['y'][self.data['idx_train']]) accuracy.data = { 'true_y': self.data['y'][self.data['idx_train']], 'pred_y': output.max(1)[1] } acc_train = accuracy.evaluate() loss_train.backward() optimizer.step() self.eval() output = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings'], self.data['idx_val']) loss_val = F.cross_entropy(output, self.data['y'][self.data['idx_val']]) accuracy.data = { 'true_y': self.data['y'][self.data['idx_val']], 'pred_y': output.max(1)[1] } acc_val = accuracy.evaluate() #------------------------- #---- keep records for drawing convergence plots ---- output = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings'], self.data['idx_test']) loss_test = F.cross_entropy(output, self.data['y'][self.data['idx_test']]) accuracy.data = { 'true_y': self.data['y'][self.data['idx_test']], 'pred_y': output.max(1)[1] } acc_test = accuracy.evaluate() self.learning_record_dict[epoch] = { 'loss_train': loss_train.item(), 'acc_train': acc_train.item(), 'loss_val': loss_val.item(), 'acc_val': acc_val.item(), 'loss_test': loss_test.item(), 'acc_test': acc_test.item(), 'time': time.time() - t_epoch_begin } # ------------------------- if epoch % 10 == 0: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'loss_test: {:.4f}'.format(loss_test.item()), 'acc_test: {:.4f}'.format(acc_test.item()), 'time: {:.4f}s'.format(time.time() - t_epoch_begin)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_begin) + ', best testing performance {: 4f}'.format( np.max([ self.learning_record_dict[epoch]['acc_test'] for epoch in self.learning_record_dict ])) + ', minimun loss {: 4f}'.format( np.min([ self.learning_record_dict[epoch]['loss_test'] for epoch in self.learning_record_dict ]))) return time.time() - t_begin, np.max([ self.learning_record_dict[epoch]['acc_test'] for epoch in self.learning_record_dict ]) def run(self): self.train_model(self.max_epoch) return self.learning_record_dict
def __init__(self, config): super(MethodGraphBertGraphClustering, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.init_weights()
def __init__(self, config): super(MethodGraphBertGraphRecovery, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.init_weights()
class MethodGraphBertGraphRecovery(BertPreTrainedModel): learning_record_dict = {} lr = 0.001 weight_decay = 5e-4 max_epoch = 500 load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config): super(MethodGraphBertGraphRecovery, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.init_weights() def forward(self, raw_features, wl_role_ids, init_pos_ids, hop_dis_ids, idx=None): outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids) sequence_output = 0 for i in range(self.config.k + 1): sequence_output += outputs[0][:, i, :] sequence_output /= float(self.config.k + 1) x_hat = sequence_output x_norm = torch.norm(x_hat, p=2, dim=1) nume = torch.mm(x_hat, x_hat.t()) deno = torch.ger(x_norm, x_norm) cosine_similarity = nume / deno return cosine_similarity def train_model(self, max_epoch): t_begin = time.time() optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) for epoch in range(max_epoch): t_epoch_begin = time.time() # ------------------------- self.train() optimizer.zero_grad() output = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings']) row_num, col_num = output.size() loss_train = torch.sum( (output - self.data['A'].to_dense())**2) / (row_num * col_num) loss_train.backward() optimizer.step() self.learning_record_dict[epoch] = { 'loss_train': loss_train.item(), 'time': time.time() - t_epoch_begin } # ------------------------- if epoch % 50 == 0: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'time: {:.4f}s'.format(time.time() - t_epoch_begin)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_begin)) return time.time() - t_begin def run(self): if self.load_pretrained_path != '': print('loading pretrained model from ' + self.load_pretrained_path + '...') self.bert = MethodGraphBert.from_pretrained( self.load_pretrained_path) self.train_model(self.max_epoch) if self.save_pretrained_path != '': print('saving pretrained model to ' + self.save_pretrained_path + '...') self.bert.save_pretrained(self.save_pretrained_path) return self.learning_record_dict
class MethodGraphBertNodeConstruct(BertPreTrainedModel): learning_record_dict = {} lr = 0.001 weight_decay = 5e-4 max_epoch = 500 load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config): super(MethodGraphBertNodeConstruct, self).__init__(config) self.config = config self.bert = MethodGraphBert(config) self.cls_y = torch.nn.Linear(config.hidden_size, config.x_size) self.init_weights() def forward(self, raw_features, wl_role_ids, init_pos_ids, hop_dis_ids, idx=None): outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids) sequence_output = 0 for i in range(self.config.k+1): sequence_output += outputs[0][:,i,:] sequence_output /= float(self.config.k+1) x_hat = self.cls_y(sequence_output) return x_hat def train_model(self, max_epoch): t_begin = time.time() optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) for epoch in range(max_epoch): t_epoch_begin = time.time() # ------------------------- self.train() optimizer.zero_grad() output = self.forward(self.data['raw_embeddings'], self.data['wl_embedding'], self.data['int_embeddings'], self.data['hop_embeddings']) loss_train = F.mse_loss(output, self.data['X']) loss_train.backward() optimizer.step() self.learning_record_dict[epoch] = {'loss_train': loss_train.item(), 'time': time.time() - t_epoch_begin} # ------------------------- if epoch % 50 == 0: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train.item()), 'time: {:.4f}s'.format(time.time() - t_epoch_begin)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_begin)) return time.time() - t_begin def run(self): if self.load_pretrained_path != '': print('loading pretrained model from ' + self.load_pretrained_path + '...') self.bert = MethodGraphBert.from_pretrained(self.load_pretrained_path) self.train_model(self.max_epoch) if self.save_pretrained_path != '': print('saving pretrained model to ' + self.save_pretrained_path + '...') self.bert.save_pretrained(self.save_pretrained_path) return self.learning_record_dict
class MethodGraphBertNodeConstruct(BertPreTrainedModel): learning_record_dict = {} lr = 0.001 weight_decay = 5e-4 max_epoch = 500 load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config): super(MethodGraphBertNodeConstruct, self).__init__(config) self.place = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(device=self.place) self.cls_y = torch.nn.Linear(config.hidden_size, config.x_size).to(device=self.place) self.init_weights() def forward(self, raw_features, wl_role_ids=None, init_pos_ids=None, hop_dis_ids=None, idx=None): # raw features (N, L, D) outputs = self.bert(raw_features, wl_role_ids, init_pos_ids, hop_dis_ids) sequence_output = 0 for i in range(self.config.k+1): sequence_output += outputs[0][:,i,:] sequence_output /= float(self.config.k+1) x_hat = self.cls_y(sequence_output) # (N, D) x_hat = torch.unsqueeze(x_hat, dim=1) # (N, L, D) scores = torch.sum(x_hat * raw_features, axis=-1) # (N, L) return scores def train_epoch(self): self.train() total_loss = 0.0 total_num = 0 for x, x_context, x_wl, y in self.dataloader: length = x.shape[0] x_feat = x.to(self.place) x_context_feat = x_context.to(self.place) x_wl = x_wl.to(self.place) output = self.forward(x_context_feat, x_wl) _, y_pred = torch.max(output, dim=-1) y_true = torch.zeros_like(y_pred, dtype=torch.int64) # (N, L) loss_train = F.cross_entropy(output, y_true, reduce='sum') self.optimizer.zero_grad() loss_train.backward() self.optimizer.step() with torch.no_grad(): total_num += length total_loss += loss_train.item() return total_loss / total_num def train_model(self, max_epoch): t_begin = time.time() self.optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) self.dataloader = DataLoader(self.data, batch_size=512, shuffle=True, num_workers=2) for epoch in range(max_epoch): t_epoch_begin = time.time() # ------------------------- loss_train = self.train_epoch() self.learning_record_dict[epoch] = {'loss_train': loss_train, 'time': time.time() - t_epoch_begin} # ------------------------- if epoch % 1 == 0: print('Epoch: {:04d}'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train), 'time: {:.4f}s'.format(time.time() - t_epoch_begin)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_begin)) return time.time() - t_begin def run(self): self.train_model(self.max_epoch) self.bert.save_pretrained(self.save_pretrained_path) print("Save pretrained model in {}".format(self.save_pretrained_path)) return self.learning_record_dict
class MethodGraphBertNodeClassification(BertPreTrainedModel): learning_record_dict = {} lr = 0.001 weight_decay = 5e-4 max_epoch = 500 load_pretrained_path = '' save_pretrained_path = '' def __init__(self, config, pretrained_path, dataset_name): super(MethodGraphBertNodeClassification, self).__init__(config) self.place = torch.device('cuda:0') self.config = config self.bert = MethodGraphBert(config).to(device=self.place) # load from pretrained model if necessary if pretrained_path is not None: self.bert = self.bert.from_pretrained(pretrained_path).to(device=self.place) self.res_h = torch.nn.Linear(config.x_size, config.hidden_size).to(device=self.place) self.res_y = torch.nn.Linear(config.x_size, config.y_size).to(device=self.place) self.cls_y = torch.nn.Linear(config.hidden_size, config.y_size).to(device=self.place) self.init_weights() self.evaluator = Evaluator(dataset_name) def forward(self, context_feat, wl_role_ids=None, node_feat=None, init_pos_ids=None, hop_dis_ids=None, idx=None): residual_h, residual_y = self.residual_term(node_feat) outputs = self.bert(context_feat, wl_role_ids, init_pos_ids, hop_dis_ids, residual_h=residual_h) sequence_output = 0 for i in range(self.config.k+1): sequence_output += outputs[0][:,i,:] sequence_output /= float(self.config.k+1) labels = self.cls_y(sequence_output) if residual_y is not None: labels += residual_y return F.log_softmax(labels, dim=1) def residual_term(self, node_feat): if self.config.residual_type == 'none': return None, None elif self.config.residual_type == 'raw': return self.res_h(node_feat), self.res_y(node_feat) elif self.config.residual_type == 'graph_raw': return torch.spmm(self.data['A'], self.res_h(self.data['X'])), torch.spmm(self.data['A'], self.res_y(self.data['X'])) def train_epoch(self): self.train() total_loss, total_right, total_num = 0.0, 0.0, 0.0 for x, x_context, x_wl, y in self.dataloader: length = x.shape[0] x_feat = x.to(self.place) x_context_feat = x_context.to(self.place) x_wl = x_wl.to(self.place) y = y.to(self.place) output = self.forward(x_context_feat, wl_role_ids=x_wl, node_feat=x_feat) loss_train = F.cross_entropy(output, y, reduction='sum') self.optimizer.zero_grad() loss_train.backward() self.optimizer.step() with torch.no_grad(): total_num += length total_loss += loss_train.item() _, pred = torch.max(output, dim=1) total_right += torch.sum(pred == y) return total_loss / total_num, total_right / total_num def eval_epoch(self): self.eval() total_loss, total_right, total_num = 0.0, 0.0, 0.0 with torch.no_grad(): for x, x_context, x_wl, y in self.dataloader: length = x.shape[0] x_feat = x.to(self.place) x_context_feat = x_context.to(self.place) x_wl = x_wl.to(self.place) y = y.to(self.place) output = self.forward(x_context_feat, wl_role_ids=x_wl, node_feat=x_feat) loss_train = F.cross_entropy(output, y, reduction='sum') total_num += length total_loss += loss_train.item() _, pred = torch.max(output, dim=1) total_right += torch.sum(pred == y) return total_loss / total_num, total_right / total_num def train_model(self, max_epoch): t_begin = time.time() self.optimizer = optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay) self.dataloader = DataLoader(self.data, batch_size=512, shuffle=True, num_workers=2) # self.val_dataloader = DataLoader(self.data[self.data.split['valid']], batch_size=512, shuffle=False, num_workers=2) for epoch in range(max_epoch): t_epoch_begin = time.time() loss_train, acc_train = self.train_epoch() self.learning_record_dict[epoch] = {'loss_train': loss_train, "acc_train": acc_train, 'time': time.time() - t_epoch_begin} if epoch % 1 == 0: print('| Epoch: {:04d} |'.format(epoch + 1), 'loss_train: {:.4f}'.format(loss_train), 'acc_train: {:.3f} |'.format(acc_train*100), 'time: {:.4f}s |'.format(time.time() - t_epoch_begin)) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_begin)) return time.time() - t_begin def run(self): self.train_model(self.max_epoch) self.bert.save_pretrained(self.save_pretrained_path) print("Save Node Classification model in {}".format(self.save_pretrained_path)) return self.learning_record_dict