def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.001)') parser.add_argument('--decay', type=float, default=0, help='weight decay (default: 0)') parser.add_argument('--num_layer', type=int, default=5, help='number of GNN message passing layers (default: 5).') parser.add_argument('--emb_dim', type=int, default=300, help='embedding dimensions (default: 300)') parser.add_argument('--dropout_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument('--JK', type=str, default="last", help='how the node features across layers are combined. last, sum, max or concat') parser.add_argument('--dataset', type=str, default = 'zinc_standard_agent', help='root directory of dataset. For now, only classification.') parser.add_argument('--output_model_file', type = str, default = '', help='filename to output the pre-trained model') parser.add_argument('--gnn_type', type=str, default="gin") parser.add_argument('--num_workers', type=int, default = 8, help='number of workers for dataset loading') args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) #set up dataset dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset, transform = NegativeEdge()) print(dataset[0]) loader = DataLoaderAE(dataset, batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers) #set up model model = GNN(args.num_layer, args.emb_dim, JK = args.JK, drop_ratio = args.dropout_ratio, gnn_type = args.gnn_type) model.to(device) #set up optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) print(optimizer) for epoch in range(1, args.epochs+1): print("====epoch " + str(epoch)) train_acc, train_loss = train(args, model, device, loader, optimizer) print(train_acc) print(train_loss) if not args.output_model_file == "": torch.save(model.state_dict(), args.output_model_file + ".pth")
def get_accuracy(params): gnn_graph = utils.build_gnn_graph(dataset, params) model = GNN(gnn_graph).to(device) setting = utils.from_json("json/setting.json")[args.dataset] optimizer = torch.optim.Adam(model.parameters(), lr=setting["learning_rate"], weight_decay=setting["weight_decay"]) fitter = Fitter(model, data, optimizer) history = fitter.run(verbose=args.verbose) reward = max(history.val.acc) return reward
def evaluate(params, dataset, device='cuda:0', val_test='test'): data = Dataset(dataset) gnn_graph = utils.build_gnn_graph(data, params) model = GNN(gnn_graph).to(device) # logger.info(dataset) setting = utils.from_json("json/setting.json")[dataset] optimizer = torch.optim.Adam(model.parameters(), lr=setting["learning_rate"], weight_decay=setting["weight_decay"]) fitter = Fitter(model, data[0].to(device), optimizer) history = fitter.run(val_test=val_test, verbose=False) return max(history.val.acc)
parser.add_argument('-c', '--cuda', type=int, required=True) args = parser.parse_args() embed_size = 64 dropout = 0.5 learning_rate = 0.01 num_epochs = 100 device = torch.device( f'cuda:{args.cuda}' if torch.cuda.is_available() else 'cpu') data_train, data_test = AMT(device) model = GNN(*data_train.size, embed_size, dropout).to(device) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): model.train() y_pred = model(data_train).reshape(-1) y = data_train.edge_type.float() loss = criterion(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step() rmse_loss_train = loss.item()**0.5 model.eval()
class Runner(object): def __init__(self, params): self.p = params self.device = torch.device('cpu' if self.p.gpu == -1 else f'cuda:{params.gpu}') #self.device = torch.device('cpu') self.data, self.num_classes, self.num_genes, self.id2label = load( self.p) #self.data['human']['hot_mat'] = torch.rand(self.data['human']['hot_mat'].shape) #self.data['mouse']['hot_mat'] = torch.rand(self.data['mouse']['hot_mat'].shape) #print(self.data['shared_gene_tensor']) #print(torch.min(self.data['shared_gene_tensor'])) #print(torch.max(self.data['shared_gene_tensor'])) #print(len(self.data['shared_gene_tensor'])) #print(self.data['human_gene_tensor']) #print(len(self.data['human_gene_tensor'])) #print(self.data['mouse_gene_tensor']) #print(len(self.data['mouse_gene_tensor'])) self.model = GNN(in_feats=self.p.dense_dim, shared_gene=self.data['shared_gene_tensor'], human_gene=self.data['human_gene_tensor'], mouse_gene=self.data['mouse_gene_tensor'], n_hidden=self.p.hidden_dim, n_class=self.num_classes, n_layer=self.p.n_layers, activation=F.relu, dropout=self.p.dropout, weighted=self.p.weighted, device=self.device, gene_num=self.num_genes).to(self.device) total_trainable_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) print(f'{total_trainable_params:,} training parameters.') self.optimizer = torch.optim.Adam(self.model.parameters(), lr=params.lr, weight_decay=self.p.weight_decay) self.domain_criterion = torch.nn.CrossEntropyLoss() if self.p.num_neighbors == 0: self.num_neighbors = max([ self.data['mouse']['graph'].number_of_nodes(), self.data['human']['graph'].number_of_nodes() ]) else: self.num_neighbors = self.p.num_neighbors self.data_loader = self.get_dataloader() def fit(self): max_test_acc = 0 final_test_report = None for epoch in range(self.p.n_epochs): # lahelr print("Epoch {}".format(epoch)) start_time = time.time() loss = self.train(epoch) train_correct, train_total, train_st, _ = self.evaluate('mouse') test_correct, test_total, test_st, test_report = self.evaluate( 'human') train_acc, test_acc = train_correct / train_total, test_correct / test_total if test_acc > max_test_acc: max_test_acc = test_acc final_test_report = test_report if epoch % 5 == 0: print( f"E [{epoch}], loss: {loss:.5f}, train acc: {train_acc:.4f}, test acc: {test_acc:.4f}, cost: {time.time() - start_time:.2f}s" ) writer.add_scalar('Train/loss',loss, epoch) writer.add_scalar('Train/acc',train_acc, epoch) writer.add_scalar('Test/acc', test_acc, epoch) # for i, (key, value) in enumerate(test_st.items()): # print(f"#{i} [{self.id2label[key]}]: {value}, [{value / test_total:.4f}]") print(f"MAX TEST ACC: {max_test_acc:.5f}") for i, label in enumerate(self.id2label): print( f"#{i} [{label}] F1-score={final_test_report[label]['f1-score']:.4f}, precision={final_test_report[label]['precision']:.4f}, recall={final_test_report[label]['recall']:.4f}" ) def train(self, epoch, species='mouse'): self.model.train() tmp_dataloader = self.get_tmp_dataloader() len_dataloader = len(tmp_dataloader['mouse']) losses = [] for i, ((source_blocks, source_edges), (target_blocks, target_edges)) in enumerate( zip(tmp_dataloader[species], tmp_dataloader['human'])): p = float(i + epoch * len_dataloader) / self.p.n_epochs / len_dataloader alpha = 2. / (1. + np.exp(-10 * p)) - 1 # source_input_nodes : neighbour of batch nodes source_input_nodes = source_blocks[0].srcdata[dgl.NID] # source_seeds : batch nodes source_seeds = source_blocks[-1].dstdata[dgl.NID] # sourc_batch_labels : torch.size([256]), label of batch_nodes source_batch_input, source_batch_labels, source_batch_seeds = self.to_device( species, source_seeds, source_input_nodes) source_blocks = [b.to(self.device) for b in source_blocks] shared_gene_tensor = self.data['shared_gene_tensor'] #print("source_batch_seeds : {}".format(source_batch_seeds.shape)) #print("shared_features : {}".format(shared_features.shape)) source_batch_shared_or_not_list = [] for i in range(source_input_nodes.shape[0]): if (source_input_nodes[i] in shared_gene_tensor): source_batch_shared_or_not_list.append(1) #print('source_batch_input_grad : {}'.format(source_batch_input[i].grad)) else: source_batch_shared_or_not_list.append(-1) source_batch_shared_or_not = torch.tensor(source_batch_shared_or_not_list,dtype=torch.float) class_or_domain = True source_class_output, _ = self.model( source_blocks, source_batch_input, self.data[species]['weight'], source_edges, class_or_domain, alpha) label_loss = self.model.cal_loss(source_class_output, source_batch_labels, self.p.lbl_smooth) self.optimizer.zero_grad() label_loss.backward(retain_graph=True) class_or_domain = False _, source_domain_output = self.model( source_blocks, source_batch_input, self.data[species]['weight'], source_edges, class_or_domain, alpha) target_input_nodes = target_blocks[0].srcdata[dgl.NID] target_seeds = target_blocks[-1].dstdata[dgl.NID] target_batch_input, target_batch_labels, target_batch_seeds = self.to_device( 'human', target_seeds, target_input_nodes) target_blocks = [b.to(self.device) for b in target_blocks] _, target_domain_output = self.model(target_blocks, target_batch_input, self.data['human']['weight'], target_edges, class_or_domain, alpha) domain_label = torch.tensor( [0] * source_domain_output.shape[0] + [1] * target_domain_output.shape[0]).long().to(self.device) domain_loss = self.domain_criterion( torch.cat([source_domain_output, target_domain_output]), domain_label) domain_loss.backward() self.optimizer.step() loss = domain_loss + label_loss losses.append(loss.item()) return np.mean(losses) def evaluate(self, species): self.model.eval() total_correct = 0 label, pred = [], [] for step, (blocks, edges) in enumerate(self.data_loader[species]): input_nodes = blocks[0].srcdata[dgl.NID] seeds = blocks[-1].dstdata[dgl.NID] batch_input, batch_labels, batch_seeds = self.to_device(species, seeds, input_nodes) blocks = [b.to(self.device) for b in blocks] shared_gene_tensor = self.data['shared_gene_tensor'] """" batch_shared_or_not_list = [] for i in range(input_nodes.shape[0]): if (input_nodes[i] in shared_gene_tensor): batch_shared_or_not_list.append(1) else: batch_shared_or_not_list.append(-1) batch_shared_or_not = torch.tensor(batch_shared_or_not_list,dtype=torch.float) """ class_or_domain = True with torch.no_grad(): batch_pred, _ = self.model(blocks, batch_input, self.data[species]['weight'], edges, class_or_domain,alpha=1) indices = torch.argmax(batch_pred, dim=1) label.extend(batch_labels.tolist()) pred.extend(indices.tolist()) total_correct += torch.sum(indices == batch_labels).item() pred_statistics = dict(collections.Counter(pred)) report = classification_report(y_true=label, y_pred=pred, target_names=self.id2label, output_dict=True) return total_correct, self.data[species][ 'num_cell'], pred_statistics, report def to_device(self, species, seeds, input_nodes): #print("{} running to_device hot_mat {}".format(species, self.data[species]['hot_mat'].shape)) #print("{} input_nodes max {} min {}".format(species, torch.max(input_nodes), torch.min(input_nodes))) #print("{} seeds max {} min {}".format(species, torch.max(seeds), torch.min(seeds))) #for i in range(len(input_nodes)): # if (input_nodes[i] > 11932): # print(input_nodes[i], (input_nodes[i] in seeds.tolist())) #print(len(input_nodes)) #print(len(seeds)) #mouse_hot_mat_test = self.data['mouse']['hot_mat'][11932:,:].numpy() #human_hot_mat_test = self.data['human']['hot_mat'][11932:,:].numpy() #row, col = np.nonzero(mouse_hot_mat_test) #print("mouse gene used {}".format(len(set(col)))) #row, col = np.nonzero(human_hot_mat_test) #print("human gene used {}".format(len(set(col)))) batch_input = self.data[species]['hot_mat'][input_nodes].to( self.device) batch_labels = self.data[species]['label'][seeds].to(self.device) batch_seeds = self.data[species]['hot_mat'][seeds].to(self.device) return batch_input, batch_labels, batch_seeds def get_dataloader(self): data_loader = dict() fanouts = [self.num_neighbors] * self.p.n_layers for species in ['human', 'mouse']: sampler = NeighborSampler(self.data[species]['graph'], fanouts) loader = DataLoader(dataset=self.data[species]['seed_id'].numpy(), batch_size=self.p.batch_size, collate_fn=sampler.sample_blocks, shuffle=False, num_workers=os.cpu_count() // 2) data_loader[species] = loader return data_loader def get_tmp_dataloader(self): data_loader = dict() seed_dict = dict() fanouts = [self.num_neighbors] * self.p.n_layers # make up length of dataset len_diff = len(self.data['human']['seed_id']) - len( self.data['mouse']['seed_id']) if len_diff > 0: seed_dict['mouse'] = self.data['mouse']['seed_id'].numpy() seed_dict['human'] = np.random.choice( self.data['human']['seed_id'].numpy(), len(self.data['mouse']['seed_id']), replace=False) # seed_dict['human'] = self.data['human']['seed_id'].numpy() # seed_dict['mouse'] = np.concatenate([self.data['mouse']['seed_id'].numpy(), # np.random.choice(self.data['mouse']['seed_id'].numpy(), len_diff)]) else: seed_dict['human'] = self.data['human']['seed_id'].numpy() seed_dict['mouse'] = np.random.choice( self.data['mouse']['seed_id'].numpy(), len(self.data['human']['seed_id']), replace=False) # seed_dict['mouse'] = self.data['mouse']['seed_id'].numpy() # seed_dict['human'] = np.concatenate([self.data['human']['seed_id'].numpy(), # np.random.choice(self.data['human']['seed_id'].numpy(), -len_diff)]) assert seed_dict['mouse'].shape == seed_dict['human'].shape for species in ['human', 'mouse']: sampler = NeighborSampler(self.data[species]['graph'], fanouts) loader = DataLoader(dataset=seed_dict[species], batch_size=self.p.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, num_workers=os.cpu_count() // 2) data_loader[species] = loader return data_loader
class DCN(): def __init__(self, batch_size, num_features, num_layers, J, dim_input, clip_grad_norm, logger): self.logger = logger self.clip_grad_norm = clip_grad_norm self.batch_size = batch_size self.J = J self.Split = Split_GNN(batch_size, num_features, num_layers, J+2, dim_input=dim_input) self.Tsp = GNN(num_features, num_layers, J+2, dim_input=dim_input) self.Merge = GNN(num_features, num_layers, J+2, dim_input=dim_input) self.optimizer_split = optim.RMSprop(self.Split.parameters()) self.optimizer_tsp = optim.Adamax(self.Tsp.parameters(), lr=1e-3) self.optimizer_merge = optim.Adamax(self.Merge.parameters(), lr=1e-3) self.test_gens = [] self.test_gens_labels = [] def load_split(self, path_load): self.Split = self.logger.load_model(path_load, 'split') self.optimizer_split = optim.RMSprop(self.Split.parameters()) def load_tsp(self, path_load): self.Tsp = self.logger.load_model(path_load, 'tsp') self.optimizer_tsp = optim.Adamax(self.Tsp.parameters(), lr=1e-3) def load_merge(self, path_load): self.Merge = self.logger.load_model(path_load, 'merge') self.optimizer_merge = optim.Adamax(self.Merge.parameters(), lr=1e-3) def save_model(self, path_load, it=-1): self.logger.save_model(path_load, self.Split, self.Tsp, self.Merge, it=it) def set_dataset(self, path_dataset, num_examples_train, num_examples_test, N_train, N_test): self.gen = Generator(path_dataset, args.path_tsp) self.gen.num_examples_train = num_examples_train self.gen.num_examples_test = num_examples_test self.gen.N_train = N_train self.gen.N_test = N_test self.gen.load_dataset() def add_test_dataset(self, gen, label): self.test_gens.append(gen) self.test_gens_labels.append(label) def sample_one(self, probs, mode='train'): probs = 1e-4 + probs*(1 - 2e-4) # to avoid log(0) if mode == 'train': rand = torch.zeros(*probs.size()).type(dtype) nn.init.uniform(rand) else: rand = torch.ones(*probs.size()).type(dtype) / 2 bin_sample = probs > Variable(rand) sample = bin_sample.clone().type(dtype) log_probs_samples = (sample*torch.log(probs) + (1-sample)*torch.log(1-probs)).sum(1) return bin_sample.data, sample.data, log_probs_samples def split_operator(self, W, sample, cities): bs = sample.size(0) Ns1 = sample.long().sum(1) N1 = Ns1.max(0)[0][0] W1 = torch.zeros(bs, N1, N1).type(dtype) cts = torch.zeros(bs, N1, 2).type(dtype) for b in range(bs): inds = torch.nonzero(sample[b]).squeeze() n = Ns1[b] W1[b,:n,:n] = W[b].index_select(1, inds).index_select(0, inds) cts[b,:n,:] = cities[b].index_select(0, inds) return W1, cts def compute_other_operators(self, W, Ns, cts, J): bs = W.size(0) N = W.size(-1) QQ = W.clone() WW = torch.zeros(bs, N, N, J + 2).type(dtype) eye = torch.eye(N).type(dtype).unsqueeze(0).expand(bs,N,N) WW[:, :, :, 0] = eye for j in range(J): WW[:, :, :, j+1] = QQ.clone() QQ = torch.bmm(QQ, QQ) mx = QQ.max(2)[0].max(1)[0].unsqueeze(1).unsqueeze(2).expand_as(QQ) QQ /= torch.clamp(mx, min=1e-6) QQ *= np.sqrt(2) d = W.sum(1) D = d.unsqueeze(1).expand_as(eye) * eye WW[:, :, :, J] = D U = Ns.float().unsqueeze(1).expand(bs,N) U = torch.ge(U, torch.arange(1,N+1).type(dtype).unsqueeze(0).expand(bs,N)) U = U.float() / Ns.float().unsqueeze(1).expand_as(U) U = torch.bmm(U.unsqueeze(2),U.unsqueeze(1)) WW[:, :, :, J+1] = U x = torch.cat((d.unsqueeze(2),cts),2) return Variable(WW), Variable(x), Variable(WW[:,:,:,1]) def compute_operators(self, W, sample, cities, J): bs = sample.size(0) Ns1 = sample.long().sum(1) Ns2 = (1-sample.long()).sum(1) W1, cts1 = self.split_operator(W, sample, cities) W2, cts2 = self.split_operator(W, 1-sample, cities) op1 = self.compute_other_operators(W1, Ns1, cts1, J) op2 = self.compute_other_operators(W2, Ns2, cts2, J) return op1, op2 WW[:, :, :, J + 1] = Phi / Phi.sum(1).unsqueeze(1).expand_as(Phi) return WW, d, Phi def join_preds(self, pred1, pred2, sample): bs = pred1.size(0) N = sample.size(1) N1 = pred1.size(1) N2 = pred2.size(1) pred = Variable(torch.ones(bs,N,N).type(dtype)*(-999)) for b in range(bs): n1 = sample[b].long().sum(0)[0] n2 = (1-sample[b]).long().sum(0)[0] inds = torch.cat((torch.nonzero(sample[b]).type(dtype),torch.nonzero(1-sample[b]).type(dtype)),0).squeeze() inds = torch.topk(-inds,N)[1] M = Variable(torch.zeros(N,N).type(dtype)) M[:n1,:n1] = pred1[b,:n1,:n1] M[n1:,n1:] = pred2[b,:n2,:n2] inds = Variable(inds, requires_grad=False) M = M.index_select(0,inds).index_select(1,inds) pred[b, :, :] = M return pred def forward(self, input, W, cities): scores, probs = self.Split(input) #variance = compute_variance(probs) bin_sample, sample, log_probs_samples = self.sample_one(probs, mode='train') op1, op2 = self.compute_operators(W.data, bin_sample, cities, self.J) pred1 = self.Tsp(op1) pred2 = self.Tsp(op2) partial_pred = self.join_preds(pred1, pred2, bin_sample) partial_pred = F.sigmoid(partial_pred) pred = self.Merge((input[0], input[1], partial_pred)) return probs, log_probs_samples, pred def compute_loss(self, pred, target, logprobs): loss_split = 0.0 loss_merge = 0.0 labels = target[1] for i in range(labels.size()[-1]): for j in range(labels.size()[0]): lab = labels[j, :, i].contiguous().view(-1) cel = CEL(pred[j], lab) loss_merge += cel loss_split += Variable(cel.data) * logprobs[j] return loss_merge/pred.size(0), loss_split/pred.size(0) def train(self, iterations, print_freq, test_freq, save_freq, path_model): for it in range(iterations): start = time.time() batch = self.gen.sample_batch(self.batch_size, cuda=torch.cuda.is_available()) input, W, WTSP, labels, target, cities, perms, costs = extract(batch) probs, log_probs_samples, pred = self.forward(input, W, cities) loss_merge, loss_split = self.compute_loss(pred, target, log_probs_samples) #loss_split -= variance*rf self.Split.zero_grad() loss_split.backward() nn.utils.clip_grad_norm(self.Split.parameters(), self.clip_grad_norm) self.optimizer_split.step() self.Tsp.zero_grad() self.Merge.zero_grad() loss_merge.backward() nn.utils.clip_grad_norm(self.Tsp.parameters(), clip_grad) nn.utils.clip_grad_norm(self.Merge.parameters(), clip_grad) self.optimizer_tsp.step() self.optimizer_merge.step() self.logger.add_train_loss(loss_split, loss_merge) self.logger.add_train_accuracy(pred, labels, W) elapsed = time.time() - start if it%print_freq == 0 and it > 0: loss_split = loss_split.data.cpu().numpy()[0] loss_merge = loss_merge.data.cpu().numpy()[0] out = ['---', it, loss_split, loss_merge, self.logger.cost_train[-1], self.logger.accuracy_train[-1], elapsed] print(template_train1.format(*info_train)) print(template_train2.format(*out)) #print(variance) #print(probs[0]) #plot_clusters(it, probs[0], cities[0]) #os.system('eog ./plots/clustering/clustering_it_{}.png'.format(it)) if it%test_freq == 0 and it >= 0: self.test() #self.logger.plot_test_logs() if it%save_freq == 0 and it > 0: self.save_model(path_model, it) def test(self): for i, gen in enumerate(self.test_gens): print('Test: {}'.format(self.test_gens_labels[i])) self.test_gen(gen) def test_gen(self, gen): iterations_test = int(gen.num_examples_test / self.batch_size) for it in range(iterations_test): start = time.time() batch = gen.sample_batch(self.batch_size, is_training=False, it=it, cuda=torch.cuda.is_available()) input, W, WTSP, labels, target, cities, perms, costs = extract(batch) probs, log_probs_samples, pred = self.forward(input, W, cities) loss_merge, loss_split = self.compute_loss(pred, target, log_probs_samples) #loss_split -= variance*rf last = (it == iterations_test-1) self.logger.add_test_accuracy(pred, labels, perms, W, cities, costs, last=last, beam_size=beam_size) self.logger.add_test_loss(loss_split, loss_merge, last=last) elapsed = time.time() - start '''if not last and it % 100 == 0: loss = loss.data.cpu().numpy()[0] out = ['---', it, loss, logger.accuracy_test_aux[-1], logger.cost_test_aux[-1], beam_size, elapsed] print(template_test1.format(*info_test)) print(template_test2.format(*out))''' print('TEST COST: {} | TEST ACCURACY {}\n' .format(self.logger.cost_test[-1], self.logger.accuracy_test[-1]))
def main(): # Training settings parser = argparse.ArgumentParser( description= 'PyTorch implementation of pre-training of graph neural networks') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--batch_size', type=int, default=2, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.001)') parser.add_argument('--decay', type=float, default=0, help='weight decay (default: 0)') parser.add_argument( '--num_layer', type=int, default=5, help='number of GNN message passing layers (default: 5).') parser.add_argument('--l1', type=int, default=1, help='l1 (default: 1).') parser.add_argument('--center', type=int, default=0, help='center (default: 0).') parser.add_argument('--emb_dim', type=int, default=300, help='embedding dimensions (default: 300)') parser.add_argument('--dropout_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--neg_samples', type=int, default=1, help='number of negative contexts per positive context (default: 1)') parser.add_argument( '--JK', type=str, default="last", help= 'how the node features are combined across layers. last, sum, max or concat' ) parser.add_argument('--context_pooling', type=str, default="mean", help='how the contexts are pooled (sum, mean, or max)') parser.add_argument('--gnn_type', type=str, default="gat") parser.add_argument('--mode', type=str, default="cbow", help="cbow or skipgram") parser.add_argument('--model_file', type=str, default='', help='filename to output the model') parser.add_argument('--num_workers', type=int, default=4, help='number of workers for dataset loading') args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) print(args.mode) #set up dataset root_unsupervised = 'dataset/unsupervised' dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform=ExtractSubstructureContextPair( l1=args.l1, center=args.center)) print(dataset[0], "\n", dataset[1], "\n", len(dataset)) print("l1: " + str(args.l1)) print("center: " + str(args.center)) loader = DataLoaderSubstructContext(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) #print(dataset[0]) #set up models, one for pre-training and one for context embeddings model_substruct = GNN(args.num_layer, args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type).to(device) model_context = GNN(3, args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type).to(device) #set up optimizer for the two GNNs optimizer_substruct = optim.Adam(model_substruct.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_context = optim.Adam(model_context.parameters(), lr=args.lr, weight_decay=args.decay) for epoch in range(1, args.epochs + 1): print("====epoch " + str(epoch)) train_loss, train_acc = train(args, model_substruct, model_context, loader, optimizer_substruct, optimizer_context, device) print(train_loss, train_acc) if not args.model_file == "": torch.save(model_substruct.state_dict(), args.model_file + ".pth")
def main(): # Training settings parser = argparse.ArgumentParser( description= 'PyTorch implementation of pre-training of graph neural networks') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.001)') parser.add_argument('--decay', type=float, default=0, help='weight decay (default: 0)') parser.add_argument( '--num_layer', type=int, default=5, help='number of GNN message passing layers (default: 5).') parser.add_argument('--emb_dim', type=int, default=300, help='embedding dimensions (default: 300)') parser.add_argument('--dropout_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument('--mask_rate', type=float, default=0.15, help='dropout ratio (default: 0.15)') parser.add_argument( '--mask_edge', type=int, default=0, help='whether to mask edges or not together with atoms') parser.add_argument( '--JK', type=str, default="last", help= 'how the node features are combined across layers. last, sum, max or concat' ) parser.add_argument('--dataset', type=str, default='zinc_standard_agent', help='root directory of dataset for pretraining') parser.add_argument('--output_model_file', type=str, default='', help='filename to output the model') parser.add_argument('--gnn_type', type=str, default="gin") parser.add_argument('--seed', type=int, default=0, help="Seed for splitting dataset.") parser.add_argument('--num_workers', type=int, default=8, help='number of workers for dataset loading') args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) print("num layer: %d mask rate: %f mask edge: %d" % (args.num_layer, args.mask_rate, args.mask_edge)) #set up dataset and transform function. dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset, transform=MaskAtom(num_atom_type=119, num_edge_type=5, mask_rate=args.mask_rate, mask_edge=args.mask_edge)) loader = DataLoaderMasking(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) #set up models, one for pre-training and one for context embeddings model = GNN(args.num_layer, args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type).to(device) linear_pred_atoms = torch.nn.Linear(args.emb_dim, 119).to(device) linear_pred_bonds = torch.nn.Linear(args.emb_dim, 4).to(device) model_list = [model, linear_pred_atoms, linear_pred_bonds] #set up optimizers optimizer_model = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_linear_pred_atoms = optim.Adam(linear_pred_atoms.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_linear_pred_bonds = optim.Adam(linear_pred_bonds.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_list = [ optimizer_model, optimizer_linear_pred_atoms, optimizer_linear_pred_bonds ] for epoch in range(1, args.epochs + 1): print("====epoch " + str(epoch)) train_loss, train_acc_atom, train_acc_bond = train( args, model_list, loader, optimizer_list, device) print(train_loss, train_acc_atom, train_acc_bond) if not args.output_model_file == "": torch.save(model.state_dict(), args.output_model_file + ".pth")
def main(): # Training settings parser = argparse.ArgumentParser( description= 'PyTorch implementation of pre-training of graph neural networks') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.001)') parser.add_argument('--decay', type=float, default=0, help='weight decay (default: 0)') parser.add_argument( '--num_layer', type=int, default=5, help='number of GNN message passing layers (default: 5).') parser.add_argument('--emb_dim', type=int, default=300, help='embedding dimensions (default: 300)') parser.add_argument('--dropout_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument('--mask_rate', type=float, default=0.15, help='dropout ratio (default: 0.15)') parser.add_argument( '--JK', type=str, default="last", help= 'how the node features are combined across layers. last, sum, max or concat' ) parser.add_argument('--gnn_type', type=str, default="gsan") parser.add_argument('--model_file', type=str, default='', help='filename to output the model') parser.add_argument('--seed', type=int, default=0, help="Seed for splitting dataset.") parser.add_argument('--num_workers', type=int, default=8, help='number of workers for dataset loading') args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) print("num layer: %d mask rate: %f" % (args.num_layer, args.mask_rate)) #set up dataset root_unsupervised = 'dataset/unsupervised' dataset = BioDataset(root_unsupervised, data_type='unsupervised', transform=MaskEdge(mask_rate=args.mask_rate)) print(dataset) loader = DataLoaderMasking(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) #set up models, one for pre-training and one for context embeddings model = GNN(args.num_layer, args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type).to(device) #Linear layer for classifying different edge types linear_pred_edges = torch.nn.Linear(args.emb_dim, 7).to(device) model_list = [model, linear_pred_edges] #set up optimizers optimizer_model = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_linear_pred_edges = optim.Adam(linear_pred_edges.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_list = [optimizer_model, optimizer_linear_pred_edges] for epoch in range(1, args.epochs + 1): print("====epoch " + str(epoch)) train_loss, train_acc = train(args, model_list, loader, optimizer_list, device) print("loss :", train_loss, "accuracy :", train_acc) if not args.model_file == "": torch.save(model.state_dict(), args.model_file + ".pth")
print( f"Top {k}-Hit Accuracy: {np.mean(np.array(list_acc))} and std: {np.std(np.array(list_acc))}" ) print( f"Kendall-Tau scores is: {np.mean(np.array(list_kt))} and std: {np.std(np.array(list_kt))}" ) #Model parameters hidden = 20 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Device selected: {device}") model = GNN(ninput=model_size, nhid=hidden, dropout=0.6) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.0005) num_epoch = 10 print("\nStaring Training") print(f"Total Number of epoches: {num_epoch}") for e in range(num_epoch): print(f"Epoch number: {e+1}/{num_epochs}") train(list_adj_train, list_adj_t_train, list_num_node_train, bc_mat_train) with torch.no_grad(): test(list_adj_test, list_adj_t_test, list_num_node_test, bc_mat_test, k=10)
def main(): """ """ args = parser.parse_args() if args.cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") data_dir = tools.select_data_dir() trainset = Sudoku(data_dir, train=True) testset = Sudoku(data_dir, train=False) trainloader = DataLoader(trainset, batch_size=args.batch_size, collate_fn=collate) testloader = DataLoader(testset, batch_size=args.batch_size, collate_fn=collate) # Create network gnn = GNN(device) if not args.skip_training: optimizer = torch.optim.Adam(gnn.parameters(), lr=args.learning_rate) loss_method = nn.CrossEntropyLoss(reduction="mean") for epoch in range(args.n_epochs): for i, data in enumerate(trainloader, 0): inputs, targets, src_ids, dst_ids = data inputs, targets = inputs.to(device), targets.to(device) src_ids, dst_ids = src_ids.to(device), dst_ids.to(device) optimizer.zero_grad() gnn.zero_grad() output = gnn.forward(inputs, src_ids, dst_ids) output = output.to(device) output = output.view(-1, output.shape[2]) targets = targets.repeat(7, 1) targets = targets.view(-1) loss = loss_method(output, targets) loss.backward() optimizer.step() fraction = fraction_of_solved_puzzles(gnn, testloader, device) print("Train Epoch {}: Loss: {:.6f} Fraction: {}".format(epoch + 1, loss.item(), fraction)) tools.save_model(gnn, "7_gnn.pth") else: gnn = GNN(device) tools.load_model(gnn, "7_gnn.pth", device) # Evaluate the trained model # Get graph iterations for some test puzzles with torch.no_grad(): inputs, targets, src_ids, dst_ids = iter(testloader).next() inputs, targets = inputs.to(device), targets.to(device) src_ids, dst_ids = src_ids.to(device), dst_ids.to(device) batch_size = inputs.size(0) // 81 outputs = gnn(inputs, src_ids, dst_ids).to(device) # [n_iters, n_nodes, 9] solution = outputs.view(gnn.n_iters, batch_size, 9, 9, 9).to(device) final_solution = solution[-1].argmax(dim=3).to(device) print("Solved puzzles in the current mini-batch:") print((final_solution.view(-1, 81) == targets.view(batch_size, 81)).all(dim=1)) # Visualize graph iteration for one of the puzzles ix = 0 for i in range(gnn.n_iters): tools.draw_sudoku(solution[i, 0], logits=True) fraction_solved = fraction_of_solved_puzzles(gnn, testloader,device) print(f"Accuracy {fraction_solved}")
def main(): # Training settings parser = argparse.ArgumentParser( description= "PyTorch implementation of pre-training of graph neural networks") parser.add_argument("--device", type=int, default=0, help="which gpu to use if any (default: 0)") parser.add_argument( "--batch_size", type=int, default=256, help="input batch size for training (default: 256)", ) parser.add_argument( "--epochs", type=int, default=100, help="number of epochs to train (default: 100)", ) parser.add_argument("--lr", type=float, default=0.001, help="learning rate (default: 0.001)") parser.add_argument("--decay", type=float, default=0, help="weight decay (default: 0)") parser.add_argument( "--num_layer", type=int, default=5, help="number of GNN message passing layers (default: 5).", ) parser.add_argument("--csize", type=int, default=3, help="context size (default: 3).") parser.add_argument("--emb_dim", type=int, default=300, help="embedding dimensions (default: 300)") parser.add_argument("--dropout_ratio", type=float, default=0, help="dropout ratio (default: 0)") parser.add_argument( "--neg_samples", type=int, default=1, help="number of negative contexts per positive context (default: 1)", ) parser.add_argument( "--JK", type=str, default="last", help="how the node features are combined across layers." "last, sum, max or concat", ) parser.add_argument( "--context_pooling", type=str, default="mean", help="how the contexts are pooled (sum, mean, or max)", ) parser.add_argument("--mode", type=str, default="cbow", help="cbow or skipgram") parser.add_argument( "--dataset", type=str, default="contextPred/chem/dataset/zinc_standard_agent", help="root directory of dataset for pretraining", ) parser.add_argument("--output_model_file", type=str, default="", help="filename to output the model") parser.add_argument("--gnn_type", type=str, default="gin") parser.add_argument("--seed", type=int, default=0, help="Seed for splitting dataset.") parser.add_argument( "--num_workers", type=int, default=8, help="number of workers for dataset loading", ) args = parser.parse_args() torch.manual_seed(0) np.random.seed(0) device = (torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")) if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) l1 = args.num_layer - 1 l2 = l1 + args.csize print(args.mode) print("num layer: %d l1: %d l2: %d" % (args.num_layer, l1, l2)) # set up dataset and transform function. dataset = MoleculeDataset( args.dataset, dataset=os.path.basename(args.dataset), transform=ExtractSubstructureContextPair(args.num_layer, l1, l2), ) loader = DataLoaderSubstructContext(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) # set up models, one for pre-training and one for context embeddings model_substruct = GNN( args.num_layer, args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type, ).to(device) model_context = GNN( int(l2 - l1), args.emb_dim, JK=args.JK, drop_ratio=args.dropout_ratio, gnn_type=args.gnn_type, ).to(device) # set up optimizer for the two GNNs optimizer_substruct = optim.Adam(model_substruct.parameters(), lr=args.lr, weight_decay=args.decay) optimizer_context = optim.Adam(model_context.parameters(), lr=args.lr, weight_decay=args.decay) for epoch in range(1, args.epochs + 1): print("====epoch " + str(epoch)) train_loss, train_acc = train( args, model_substruct, model_context, loader, optimizer_substruct, optimizer_context, device, ) print(train_loss, train_acc) if not args.output_model_file == "": torch.save(model_substruct.state_dict(), args.output_model_file + ".pth")
gen.load_dataset() clip_grad = 40.0 iterations = 5000 batch_size = 20 num_features = 10 num_layers = 5 J = 4 rf = 10.0 # regularization factor Split = Split_GNN(batch_size, num_features, num_layers, J + 2, dim_input=3) Tsp = GNN(num_features, num_layers, J + 2, dim_input=3) Merge = GNN(num_features, num_layers, J + 2, dim_input=3) optimizer_split = optim.RMSprop(Split.parameters()) optimizer_tsp = optim.Adamax(Tsp.parameters(), lr=1e-3) optimizer_merge = optim.Adamax(Merge.parameters(), lr=1e-3) for it in range(iterations): sample = gen.sample_batch(batch_size, cuda=torch.cuda.is_available()) input, W, WTSP, labels, target, cities, perms, costs = extract(sample) scores, probs = Split(input) variance = compute_variance(probs) sample, log_probs_samples = sample_one(probs, mode='train') WW, x, Phi = compute_operators(W.data, sample, J) x = torch.cat((x.unsqueeze(2), cities), 2) y = WW[:, :, :, 1] WW = Variable(WW).type(dtype) x = Variable(x).type(dtype) y = Variable(y).type(dtype) #print(WW, x, y)