def data_to_loader_ttbar(full_dataset, n_train, n_valid, batch_size): # https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html train_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=0, stop=n_train)) valid_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=n_train, stop=n_train+n_valid)) # preprocessing the train_dataset in a good format for passing correct batches of events to the GNN train_data=[] for i in range(len(train_dataset)): train_data = train_data + train_dataset[i] # preprocessing the valid_dataset in a good format for passing correct batches of events to the GNN valid_data=[] for i in range(len(valid_dataset)): valid_data = valid_data + valid_dataset[i] if not multi_gpu: train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True) else: #https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/data_parallel.html train_loader = DataListLoader(train_data, batch_size=batch_size, shuffle=True) valid_loader = DataListLoader(valid_data, batch_size=batch_size, shuffle=True) return train_loader, valid_loader
def create_loaders(self) -> Tuple[DataLoader, DataLoader]: train_loader = DataListLoader(self.train_dataset, batch_size=self.train_batch_size, shuffle=False) test_loader = DataListLoader(self.test_dataset, batch_size=1, shuffle=False) return train_loader, test_loader
def create_energy_dataset(): class LoadDataset(InMemoryDataset): def __init__(self, root): super(LoadDataset, self).__init__(root) self.data, self.slices = torch.load(root + '/processed/processed') @property def processed_file_names(self): return os.listdir( 'C:/Users/jv97/Desktop/github/Neutrino-Machine-Learning/copy_dataset/processed' ) def process(self): pass print('Loads data') dataset = LoadDataset( root= 'C:/Users/jv97/Desktop/github/Neutrino-Machine-Learning/copy_dataset') dataset.data.y = dataset.data.y[::8] dataset.slices['y'] = torch.tensor(np.arange(300000 + 1)) dataset = dataset.shuffle() train_dataset = dataset[:200000] print(len(train_dataset)) test_dataset = dataset[200000:] for train_list in DataListLoader(train_dataset, batch_size=len(train_dataset)): pass for test_list in DataListLoader(test_dataset, batch_size=len(test_dataset)): pass class MakeDataset(InMemoryDataset): def __init__(self, root, data_list, name): super(MakeDataset, self).__init__(root) self.data, self.slices = self.collate(data_list) torch.save((self.data, self.slices), root + '/' + name) @property def processed_file_names(self): return os.listdir( 'C:/Users/jv97/Desktop/github/Neutrino-Machine-Learning/train_test_datasets' ) def process(self): pass MakeDataset( 'C:/Users/jv97/Desktop/github/Neutrino-Machine-Learning/train_test_datasets', train_list, 'train_energy') MakeDataset( 'C:/Users/jv97/Desktop/github/Neutrino-Machine-Learning/train_test_datasets', test_list, 'test_energy')
def get_data(): gdata = GraphDataset(root='/anomalyvol/data/gnn_node_global_merge', bb=0) fulllen = len(gdata) tv_frac = 0.10 tv_num = math.ceil(fulllen * tv_frac) torch.manual_seed(0) train_dataset, valid_dataset, test_dataset = random_split( gdata, [fulllen - 2 * tv_num, tv_num, tv_num]) train_loader = DataListLoader(train_dataset, batch_size=batch_size, pin_memory=True, shuffle=True) train_loader.collate_fn = collate valid_loader = DataListLoader(valid_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) valid_loader.collate_fn = collate test_loader = DataListLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) test_loader.collate_fn = collate train_samples = len(train_dataset) valid_samples = len(valid_dataset) test_samples = len(test_dataset) return train_loader, valid_loader, test_loader, train_samples, valid_samples, test_samples
def evaluate_RGNN(model, te_dataset, label_type): assert label_type in label_types model.eval() y_pred = [] y_true = [] te_loader = DataListLoader(te_dataset) with torch.no_grad(): for te_data_list in te_loader: # output shape (len(te_data_list), 5 or 1) output, _ = model(te_data_list) y = torch.cat([data.y for data in te_data_list]).to(output.device) y_true.extend(y.detach().cpu().numpy()) if label_type == "hard" or label_type == "soft": pred = torch.argmax(output, dim=1) y_pred.extend(pred.detach().cpu().numpy()) else: sep = torch.Tensor([-2, -1, 0, 1, 2]) sep = sep.repeat(len(te_data_list), 1) diff = torch.abs(sep.t() - y) pred = torch.argmin(diff, dim=0) y_pred.extend(pred.detach().cpu().numpy()) macro_f1_score = sklearn.metrics.f1_score(y_true, y_pred, average='macro') # micro_f1_score = sklearn.metrics.f1_score(y_true, y_pred, average='micro') accuracy = sklearn.metrics.accuracy_score(y_true, y_pred) return accuracy, macro_f1_score
def main(): args = arg_parse() assert args.pretrained_ckpt is not None, '--pretrained_ckpt is required.' assert args.json_output_dir is not None, '--json_output_dir is required.' args.devices = [int(device_id) for device_id in args.devices.split(',')] args.json_output_dir = os.path.join(args.json_output_dir, args.inference_method) if not os.path.exists(args.json_output_dir): os.makedirs(args.json_output_dir) test_set = HAMPerFile(data_root=args.data_root, cycle_feat=args.use_cycle_feat, degree_feat=args.use_degree_feat, automorphism=args.automorphism) test_dataloader = DataListLoader(test_set, batch_size=1, num_workers=args.num_workers, pin_memory=True) model = DSGPM(args.input_dim, args.hidden_dim, args.output_dim, args=args).cuda() ckpt = torch.load(args.pretrained_ckpt) model.load_state_dict(ckpt) with torch.no_grad(): eval(test_dataloader, model, args)
def test_enzymes(): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = TUDataset(root, 'ENZYMES') assert len(dataset) == 600 assert dataset.num_features == 3 assert dataset.num_classes == 6 assert dataset.__repr__() == 'ENZYMES(600)' assert len(dataset[0]) == 3 assert len(dataset.shuffle()) == 600 assert len(dataset.shuffle(return_perm=True)) == 2 assert len(dataset[:100]) == 100 assert len(dataset[torch.arange(100, dtype=torch.long)]) == 100 mask = torch.zeros(600, dtype=torch.bool) mask[:100] = 1 assert len(dataset[mask]) == 100 loader = DataLoader(dataset, batch_size=len(dataset)) for data in loader: assert data.num_graphs == 600 avg_num_nodes = data.num_nodes / data.num_graphs assert pytest.approx(avg_num_nodes, abs=1e-2) == 32.63 avg_num_edges = data.num_edges / (2 * data.num_graphs) assert pytest.approx(avg_num_edges, abs=1e-2) == 62.14 assert len(data) == 5 assert list(data.x.size()) == [data.num_nodes, 3] assert list(data.y.size()) == [data.num_graphs] assert data.y.max() + 1 == 6 assert list(data.batch.size()) == [data.num_nodes] assert data.ptr.numel() == data.num_graphs + 1 assert data.contains_isolated_nodes() assert not data.contains_self_loops() assert data.is_undirected() loader = DataListLoader(dataset, batch_size=len(dataset)) for data_list in loader: assert len(data_list) == 600 dataset.transform = ToDense(num_nodes=126) loader = DenseDataLoader(dataset, batch_size=len(dataset)) for data in loader: assert len(data) == 4 assert list(data.x.size()) == [600, 126, 3] assert list(data.adj.size()) == [600, 126, 126] assert list(data.mask.size()) == [600, 126] assert list(data.y.size()) == [600, 1] dataset = TUDataset(root, 'ENZYMES', use_node_attr=True) assert dataset.num_node_features == 21 assert dataset.num_features == 21 assert dataset.num_edge_features == 0 shutil.rmtree(root)
def __init__(self, option, model, train_dataset, valid_dataset, test_dataset=None, weight=[[1.0, 1.0]], tasks_num=17): # Most important variable self.option = option self.device = torch.device("cuda:{}".format(option['gpu'][0]) if torch.cuda.is_available() else "cpu") self.model = DataParallel(model).to(self.device) if option['parallel'] else model.to(self.device) # Setting the train valid and test data loader if self.option['parallel']: self.train_dataloader = DataListLoader(train_dataset, batch_size=self.option['batch_size'], shuffle=True) self.valid_dataloader = DataListLoader(valid_dataset, batch_size=self.option['batch_size']) if test_dataset: self.test_dataloader = DataListLoader(test_dataset, batch_size=self.option['batch_size']) else: self.train_dataloader = DataLoader(train_dataset, batch_size=self.option['batch_size'], shuffle=True) self.valid_dataloader = DataLoader(valid_dataset, batch_size=self.option['batch_size']) if test_dataset: self.test_dataloader = DataLoader(test_dataset, batch_size=self.option['batch_size']) self.save_path = self.option['exp_path'] # Setting the Adam optimizer with hyper-param if option['focalloss']: self.log('Using FocalLoss') self.criterion = [FocalLoss(alpha=1 / w[0]) for w in weight] # alpha 0.965 else: self.criterion = [torch.nn.CrossEntropyLoss(torch.Tensor(w).to(self.device), reduction='mean') for w in weight] self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.option['lr'], weight_decay=option['weight_decay']) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode='min', factor=0.7, patience=self.option['lr_scheduler_patience'], min_lr=1e-6 ) # other self.start = time.time() self.tasks_num = tasks_num self.records = {'trn_record': [], 'val_record': [], 'val_losses': [], 'best_ckpt': None, 'val_roc': [], 'val_prc': []} self.log(msgs=['\t{}:{}\n'.format(k, v) for k, v in self.option.items()], show=False) self.log('train set num:{} valid set num:{} test set num: {}'.format( len(train_dataset), len(valid_dataset), len(test_dataset))) self.log("total parameters:" + str(sum([p.nelement() for p in self.model.parameters()]))) self.log(msgs=str(model).split('\n'), show=False)
def train(self): tqdm_bar = tqdm(range(self.config.epochs)) for epoch_idx in tqdm_bar: # iterate through epoch acc_loss_train = 0 self.sequence_loader = DataListLoader(self.training_data, batch_size=self.config.batch_size) for data_list in self.sequence_loader: # iterate through scenegraphs self.model.train() self.optimizer.zero_grad() labels = torch.empty(0).long().to(self.config.device) outputs = torch.empty(0,2).to(self.config.device) for sequence in data_list: # iterate through sequences data, label = sequence['sequence'], sequence['label'] graph_list = [Data(x=g['node_features'], edge_index=g['edge_index'], edge_attr=g['edge_attr']) for g in data] # data is a sequence that consists of serveral graphs self.train_loader = DataLoader(graph_list, batch_size=len(graph_list)) sequence = next(iter(self.train_loader)).to(self.config.device) output, _ = self.model.forward(sequence.x, sequence.edge_index, sequence.edge_attr, sequence.batch) outputs = torch.cat([outputs, output.view(-1, 2)], dim=0) labels = torch.cat([labels, torch.LongTensor([label]).to(self.config.device)], dim=0) loss_train = self.loss_func(outputs, labels) loss_train.backward() acc_loss_train += loss_train.detach().cpu().item() * len(data_list) self.optimizer.step() acc_loss_train /= len(self.training_data) tqdm_bar.set_description('Epoch: {:04d}, loss_train: {:.4f}'.format(epoch_idx, acc_loss_train)) if epoch_idx % self.config.test_step == 0: _, _, metrics, _ = self.evaluate(epoch_idx) self.summary_writer.add_scalar('Acc_Loss/train', metrics['train']['loss'], epoch_idx) self.summary_writer.add_scalar('Acc_Loss/train_acc', metrics['train']['acc'], epoch_idx) self.summary_writer.add_scalar('F1/train', metrics['train']['f1'], epoch_idx) # self.summary_writer.add_scalar('Confusion/train', metrics['train']['confusion'], epoch_idx) self.summary_writer.add_scalar('Precision/train', metrics['train']['precision'], epoch_idx) self.summary_writer.add_scalar('Recall/train', metrics['train']['recall'], epoch_idx) self.summary_writer.add_scalar('Auc/train', metrics['train']['auc'], epoch_idx) self.summary_writer.add_scalar('Acc_Loss/test', metrics['test']['loss'], epoch_idx) self.summary_writer.add_scalar('Acc_Loss/test_acc', metrics['test']['acc'], epoch_idx) self.summary_writer.add_scalar('F1/test', metrics['test']['f1'], epoch_idx) # self.summary_writer.add_scalar('Confusion/test', metrics['test']['confusion'], epoch_idx) self.summary_writer.add_scalar('Precision/test', metrics['test']['precision'], epoch_idx) self.summary_writer.add_scalar('Recall/test', metrics['test']['recall'], epoch_idx) self.summary_writer.add_scalar('Auc/test', metrics['test']['auc'], epoch_idx)
def main(): opt = OptInit().initialize() opt.printer.info('===> Creating dataloader ...') train_dataset = GeoData.S3DIS(opt.train_path, 5, True, pre_transform=T.NormalizeScale()) if opt.multi_gpus: train_loader = DataListLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) else: train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) opt.n_classes = train_loader.dataset.num_classes opt.printer.info('===> Loading the network ...') model = SparseDeepGCN(opt).to(opt.device) if opt.multi_gpus: model = DataParallel(SparseDeepGCN(opt)).to(opt.device) opt.printer.info('===> loading pre-trained ...') model, opt.best_value, opt.epoch = load_pretrained_models( model, opt.pretrained_model, opt.phase) opt.printer.info('===> Init the optimizer ...') criterion = torch.nn.CrossEntropyLoss().to(opt.device) if opt.optim.lower() == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optim.lower() == 'radam': optimizer = optim.RAdam(model.parameters(), lr=opt.lr) else: raise NotImplementedError('opt.optim is not supported') scheduler = torch.optim.lr_scheduler.StepLR(optimizer, opt.lr_adjust_freq, opt.lr_decay_rate) optimizer, scheduler, opt.lr = load_pretrained_optimizer( opt.pretrained_model, optimizer, scheduler, opt.lr) opt.printer.info('===> Init Metric ...') opt.losses = AverageMeter() # opt.test_metric = miou # opt.test_values = AverageMeter() opt.test_value = 0. opt.printer.info('===> start training ...') for _ in range(opt.total_epochs): opt.epoch += 1 train(model, train_loader, optimizer, scheduler, criterion, opt) # test_value = test(model, test_loader, test_metric, opt) scheduler.step() opt.printer.info('Saving the final model.Finish!')
def data_to_loader_qcd(full_dataset, n_test, batch_size): test_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=0, stop=n_test)) # preprocessing the test_dataset in a good format for passing correct batches of events to the GNN test_data=[] for i in range(len(test_dataset)): test_data = test_data + test_dataset[i] if not multi_gpu: test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True) else: #https://pytorch-geometric.readthedocs.io/en/latest/_modules/torch_geometric/nn/data_parallel.html test_loader = DataListLoader(test_data, batch_size=batch_size, shuffle=True) return test_loader
def main(): args = arg_parse() assert args.vis_root is not None, '--vis_root is required.' args.devices = [int(device_id) for device_id in args.devices] # loading data test_set = HAMPerFile(data_root=args.data_root, cycle_feat=args.use_cycle_feat, degree_feat=args.use_degree_feat, automorphism=False) test_dataloader = DataListLoader(test_set, batch_size=1, num_workers=0, pin_memory=True) args.vis_path = os.path.join(args.vis_root, args.title) if not args.debug: if os.path.exists(args.vis_path): shutil.rmtree(args.vis_path) os.makedirs(args.vis_path) gen_vis(test_dataloader, args)
def main(): opt = OptInit().initialize() print('===> Creating dataloader ...') train_dataset = GeoData.S3DIS(opt.train_path, 5, True, pre_transform=T.NormalizeScale()) if opt.multi_gpus: train_loader = DataListLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) else: train_loader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=4) opt.n_classes = train_loader.dataset.num_classes print('===> Loading the network ...') opt.model = getattr(models, opt.model_name)(opt).to(opt.device) if opt.multi_gpus: opt.model = DataParallel(getattr(models, opt.model_name)(opt)).to(opt.device) print('===> loading pre-trained ...') load_pretrained_models(opt) print('===> Init the optimizer ...') opt.criterion = torch.nn.CrossEntropyLoss().to(opt.device) opt.valid_metric = miou opt.optimizer = torch.optim.Adam(opt.model.parameters(), lr=opt.lr) opt.scheduler = torch.optim.lr_scheduler.StepLR(opt.optimizer, opt.lr_adjust_freq, 0.5) load_pretrained_optimizer(opt) print('===> start training ...') for _ in range(opt.total_epochs): opt.epoch += 1 train(train_loader, opt) # valid(train_loader, opt) opt.scheduler.step() print('Saving the final model.Finish!')
def train(i, epoch): if args.multi_gpus: data_loader = DataListLoader(train_data_list, batch_size=args.batch_size, shuffle=True, num_workers=16) else: data_loader = DataLoader(train_data_list, batch_size=args.batch_size, shuffle=True, num_workers=16, pin_memory=True) train_model1.train() train_model2.train() optimizer.zero_grad() for batch_idx, Batch_data in enumerate(data_loader): batch_x = train_model1(Batch_data) gt = torch.cat([data_batch.y for data_batch in Batch_data], 0).to(dev) if batch_idx == 0: group_x = batch_x group_y = gt else: group_x = torch.cat((group_x, batch_x), 0) group_y = torch.cat((group_y, gt), 0) group_y = group_y.squeeze(1) if group_y.dim() == 2 else group_y out_labels = train_model2(group_x) if args.multi_gpus: mu = train_model1.module.conv2.mu else: mu = train_model1.conv2.mu loss = total_loss(group_x, group_y, mu) loss.backward() optimizer.step() _, pred = out_labels.max(dim=-1) correct = pred.eq(group_y).sum().item() train_acc = correct / len(group_y) train_score = out_labels[:, 1] train_auc = roc_auc_score(group_y.cpu(), train_score.cpu().detach().numpy()) train_pre = precision_score(group_y.cpu(), pred.cpu()) print( "Iter: {:03d} | Epoch: {:05d} | Train_Loss: {:.4f}| Train_ACC: {:.4f} | Train_AUC: {:.4f}| Train_Pre: {:.4f}" .format(i, epoch, loss.item(), train_acc, train_auc, train_pre))
def from_data_to_loader(full_dataset, n_train, n_val, batch_size): train_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=0, stop=n_train)) valid_dataset = torch.utils.data.Subset( full_dataset, np.arange(start=n_train, stop=n_train + n_val)) # preprocessing the train_dataset in a good format for passing correct batches of events to the GNN train_dataset_batched = [] for i in range(len(train_dataset)): train_dataset_batched += train_dataset[i] train_dataset_batched = [[i] for i in train_dataset_batched] # preprocessing the valid_dataset in a good format for passing correct batches of events to the GNN valid_dataset_batched = [] for i in range(len(valid_dataset)): valid_dataset_batched += valid_dataset[i] valid_dataset_batched = [[i] for i in valid_dataset_batched] #hack for multi-gpu training if not multi_gpu: def collate(items): l = sum(items, []) return Batch.from_data_list(l) else: def collate(items): l = sum(items, []) return l train_loader = DataListLoader(train_dataset_batched, batch_size, pin_memory=True, shuffle=True) train_loader.collate_fn = collate valid_loader = DataListLoader(valid_dataset_batched, batch_size, pin_memory=True, shuffle=False) valid_loader.collate_fn = collate return train_loader, valid_loader
def model_training(data_list_train, data_list_test, epochs, acc_epoch, acc_epoch2, save_model_epochs, validation_epoch, batchsize, logfilename, load_checkpoint= None): #logging logging.basicConfig(level=logging.DEBUG, filename='./logfiles/'+logfilename, filemode="w+", format="%(message)s") trainloader = DataListLoader(data_list_train, batch_size=batchsize, shuffle=True) testloader = DataListLoader(data_list_test, batch_size=batchsize, shuffle=True) device = torch.device('cuda') complete_net = completeNet() complete_net = DataParallel(complete_net) complete_net = complete_net.to(device) #train parameters weights = [10, 1] optimizer = torch.optim.Adam(complete_net.parameters(), lr=0.001, weight_decay=0.001) #resume training initial_epoch=1 if load_checkpoint!=None: checkpoint = torch.load(load_checkpoint) complete_net.load_state_dict(checkpoint['model_state_dict'], strict=False) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) initial_epoch = checkpoint['epoch']+1 loss = checkpoint['loss'] complete_net.train() for epoch in range(initial_epoch, epochs+1): epoch_total=0 epoch_total_ones= 0 epoch_total_zeros= 0 epoch_correct=0 epoch_correct_ones= 0 epoch_correct_zeros= 0 running_loss= 0 batches_num=0 for batch in trainloader: batch_total=0 batch_total_ones= 0 batch_total_zeros= 0 batch_correct= 0 batch_correct_ones= 0 batch_correct_zeros= 0 batches_num+=1 # Forward-Backpropagation output, output2, ground_truth, ground_truth2, det_num, tracklet_num= complete_net(batch) optimizer.zero_grad() loss = weighted_binary_cross_entropy(output, ground_truth, weights) loss.backward() optimizer.step() ##Accuracy if epoch%acc_epoch==0 and epoch!=0: # Hungarian method, clean up cleaned_output= hungarian(output2, ground_truth2, det_num, tracklet_num) batch_total += cleaned_output.size(0) ones= torch.tensor([1 for x in cleaned_output]).to(device) zeros = torch.tensor([0 for x in cleaned_output]).to(device) batch_total_ones += (cleaned_output == ones).sum().item() batch_total_zeros += (cleaned_output == zeros).sum().item() batch_correct += (cleaned_output == ground_truth2).sum().item() temp1 = (cleaned_output == ground_truth2) temp2 = (cleaned_output == ones) batch_correct_ones += (temp1 & temp2).sum().item() temp3 = (cleaned_output == zeros) batch_correct_zeros += (temp1 & temp3).sum().item() epoch_total += batch_total epoch_total_ones += batch_total_ones epoch_total_zeros += batch_total_zeros epoch_correct += batch_correct epoch_correct_ones += batch_correct_ones epoch_correct_zeros += batch_correct_zeros if loss.item()!=loss.item(): print("Error") break if batch_total_ones != 0 and batch_total_zeros != 0 and epoch%acc_epoch==0 and epoch!=0: print('Epoch: [%d] | Batch: [%d] | Training_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, batches_num, loss.item(), 100 * batch_correct / batch_total, 100 * batch_correct_ones / batch_total_ones, 100 * batch_correct_zeros / batch_total_zeros)) logging.info('Epoch: [%d] | Batch: [%d] | Training_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, batches_num, loss.item(), 100 * batch_correct / batch_total, 100 * batch_correct_ones / batch_total_ones, 100 * batch_correct_zeros / batch_total_zeros)) else: print('Epoch: [%d] | Batch: [%d] | Training_Loss: %.3f |' % (epoch, batches_num, loss.item())) logging.info('Epoch: [%d] | Batch: [%d] | Training_Loss: %.3f |' % (epoch, batches_num, loss.item())) running_loss += loss.item() if loss.item()!=loss.item(): print("Error") break if epoch_total_ones!=0 and epoch_total_zeros!=0 and epoch%acc_epoch==0 and epoch!=0: print('Epoch: [%d] | Training_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, running_loss / batches_num, 100 * epoch_correct / epoch_total, 100 * \ epoch_correct_ones / epoch_total_ones, 100 * epoch_correct_zeros / epoch_total_zeros)) logging.info('Epoch: [%d] | Training_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, running_loss / batches_num, 100 * epoch_correct / epoch_total, 100 * \ epoch_correct_ones / epoch_total_ones, 100 * epoch_correct_zeros / epoch_total_zeros)) else: print('Epoch: [%d] | Training_Loss: %.3f |' % (epoch, running_loss / batches_num)) logging.info('Epoch: [%d] | Training_Loss: %.3f |' % (epoch, running_loss / batches_num)) # save model if epoch%save_model_epochs==0 and epoch!=0: torch.save({ 'epoch': epoch, 'model_state_dict': complete_net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': running_loss, }, './models/epoch_'+str(epoch)+'.pth') #validation if epoch%validation_epoch==0 and epoch!=0: with torch.no_grad(): epoch_total=0 epoch_total_ones= 0 epoch_total_zeros= 0 epoch_correct=0 epoch_correct_ones= 0 epoch_correct_zeros= 0 running_loss= 0 batches_num=0 for batch in testloader: batch_total=0 batch_total_ones= 0 batch_total_zeros= 0 batch_correct= 0 batch_correct_ones= 0 batch_correct_zeros= 0 batches_num+=1 output, output2, ground_truth, ground_truth2, det_num, tracklet_num = complete_net(batch) loss = weighted_binary_cross_entropy(output, ground_truth, weights) running_loss += loss.item() ##Accuracy if epoch%acc_epoch2==0 and epoch!=0: # Hungarian method, clean up cleaned_output= hungarian(output2, ground_truth2, det_num, tracklet_num) batch_total += cleaned_output.size(0) ones= torch.tensor([1 for x in cleaned_output]).to(device) zeros = torch.tensor([0 for x in cleaned_output]).to(device) batch_total_ones += (cleaned_output == ones).sum().item() batch_total_zeros += (cleaned_output == zeros).sum().item() batch_correct += (cleaned_output == ground_truth2).sum().item() temp1 = (cleaned_output == ground_truth2) temp2 = (cleaned_output == ones) batch_correct_ones += (temp1 & temp2).sum().item() temp3 = (cleaned_output == zeros) batch_correct_zeros += (temp1 & temp3).sum().item() epoch_total += batch_total epoch_total_ones += batch_total_ones epoch_total_zeros += batch_total_zeros epoch_correct += batch_correct epoch_correct_ones += batch_correct_ones epoch_correct_zeros += batch_correct_zeros if epoch_total_ones!=0 and epoch_total_zeros!=0 and epoch%acc_epoch2==0 and epoch!=0: print('Epoch: [%d] | Validation_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, running_loss / batches_num, 100 * epoch_correct / epoch_total, 100 * \ epoch_correct_ones / epoch_total_ones, 100 * epoch_correct_zeros / epoch_total_zeros)) logging.info('Epoch: [%d] | Validation_Loss: %.3f | Total_Accuracy: %.3f | Ones_Accuracy: %.3f | Zeros_Accuracy: %.3f |' % (epoch, running_loss / batches_num, 100 * epoch_correct / epoch_total, 100 * \ epoch_correct_ones / epoch_total_ones, 100 * epoch_correct_zeros / epoch_total_zeros)) else: print('Epoch: [%d] | Validation_Loss: %.3f |' % (epoch, running_loss / batches_num)) logging.info('Epoch: [%d] | Validation_Loss: %.3f |' % (epoch, running_loss / batches_num))
if torch.cuda.device_count() <= 1 or C["single_gpu"] or C["debug"]: # prepate data for single GPU or CPU DL_tr = DataLoader(train_dataset, batch_size=C["batch_size"], shuffle=C["shuffle"], pin_memory=True) DL_vl = DataLoader(valid_dataset, batch_size=1, shuffle=False, pin_memory=True) else: # prepare data for parallelization over multiple GPUs DL_tr = DataListLoader(train_dataset, batch_size=torch.cuda.device_count() * C["batch_size"], shuffle=C["shuffle"], pin_memory=True) DL_vl = DataListLoader(valid_dataset, batch_size=1, shuffle=False, pin_memory=True) #################################################################################################### # Create the model we'll be training. nF = train_info['num_features'] if C["model"]["name"] == "NetConvPool": model = NetConvPool(nF, C['nc'], **C["model"]["kwargs"]) elif C["model"]["name"] == "PointNetPP": model = PointNetPP(nF, C['nc'], **C["model"]["kwargs"]) elif C["model"]["name"] == "MultiBranchNet":
def create_models(features, spectators, labels, nfeatures, nspectators, nlabels, ntracks, train_files, test_files, val_files, batch_size, remove_mass_pt_window, remove_unlabeled, max_entry): #imports from tensorflow.keras.models import Model from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau from tensorflow.keras.layers import Input, Dense, BatchNormalization, Conv1D, Flatten, Lambda # DATA GENERATORS FOR USE IN MODEL TRAINING AND TESTING train_generator = DataGenerator( train_files, features, labels, spectators, batch_size=batch_size, n_dim=ntracks, remove_mass_pt_window=remove_mass_pt_window, remove_unlabeled=remove_unlabeled, max_entry=max_entry) val_generator = DataGenerator(val_files, features, labels, spectators, batch_size=batch_size, n_dim=ntracks, remove_mass_pt_window=remove_mass_pt_window, remove_unlabeled=remove_unlabeled, max_entry=max_entry) test_generator = DataGenerator(test_files, features, labels, spectators, batch_size=batch_size, n_dim=ntracks, remove_mass_pt_window=remove_mass_pt_window, remove_unlabeled=remove_unlabeled, max_entry=max_entry) #weights for training training_weights = { 0: 3.479, 1: 4.002, 2: 3.246, 3: 2.173, 4: 0.253, 5: 1.360 } # FULLY CONNECTED NEURAL NET CLASSIFIER # define dense keras model inputs = Input(shape=( ntracks, nfeatures, ), name='input') x = BatchNormalization(name='bn_1')(inputs) x = Flatten(name='flatten_1')(x) x = Dense(64, name='dense_1', activation='relu')(x) x = Dense(32, name='dense_2', activation='relu')(x) x = Dense(32, name='dense_3', activation='relu')(x) outputs = Dense(nlabels, name='output', activation='softmax')(x) keras_model_dense = Model(inputs=inputs, outputs=outputs) keras_model_dense.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print(keras_model_dense.summary()) # define callbacks early_stopping = EarlyStopping(monitor='val_loss', patience=35) reduce_lr = ReduceLROnPlateau(patience=5, factor=0.5) model_checkpoint = ModelCheckpoint('keras_model_dense_best.h5', monitor='val_loss', save_best_only=True) callbacks = [early_stopping, model_checkpoint, reduce_lr] # fit keras model history_dense = keras_model_dense.fit_generator( train_generator, validation_data=val_generator, steps_per_epoch=len(train_generator), validation_steps=len(val_generator), max_queue_size=5, epochs=50, class_weight=training_weights, shuffle=False, callbacks=callbacks, verbose=0) # reload best weights keras_model_dense.load_weights('keras_model_dense_best.h5') visualize_loss(history_dense) visualize('fcnn_loss.png') from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, Dense, BatchNormalization, Conv1D, Flatten, Lambda, GlobalAveragePooling1D import tensorflow.keras.backend as K # define Deep Sets model with Conv1D Keras layer inputs = Input(shape=( ntracks, nfeatures, ), name='input') x = BatchNormalization(name='bn_1')(inputs) x = Conv1D(64, 1, strides=1, padding='same', name='conv1d_1', activation='relu')(x) x = Conv1D(32, 1, strides=1, padding='same', name='conv1d_2', activation='relu')(x) x = Conv1D(32, 1, strides=1, padding='same', name='conv1d_3', activation='relu')(x) # sum over tracks x = GlobalAveragePooling1D(name='pool_1')(x) x = Dense(100, name='dense_1', activation='relu')(x) outputs = Dense(nlabels, name='output', activation='softmax')(x) keras_model_conv1d = Model(inputs=inputs, outputs=outputs) keras_model_conv1d.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print(keras_model_conv1d.summary()) # define callbacks from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau early_stopping = EarlyStopping(monitor='val_loss', patience=35) #defining learningrate decay model num_epochs = 100 initial_learning_rate = 0.01 decay = initial_learning_rate / num_epochs learn_rate_decay = lambda epoch, lr: lr * 1 / (1 + decay * epoch) reduce_lr2 = ReduceLROnPlateau(patience=5, factor=0.5) #reduce_lr = ReduceLROnPlateau(patience=5,factor=0.5) reduce_lr = LearningRateScheduler(learn_rate_decay) model_checkpoint = ModelCheckpoint('keras_model_conv1d_best.h5', monitor='val_loss', save_best_only=True) #callbacks = [early_stopping, model_checkpoint, reduce_lr2] callbacks = [early_stopping, model_checkpoint, reduce_lr2] #weights for training training_weights = { 0: 3.479, 1: 4.002, 2: 3.246, 3: 2.173, 4: 0.253, 5: 1.360 } # fit keras model history_conv1d = keras_model_conv1d.fit_generator( train_generator, validation_data=val_generator, steps_per_epoch=len(train_generator), validation_steps=len(val_generator), max_queue_size=5, epochs=num_epochs, class_weight=training_weights, shuffle=False, callbacks=callbacks, verbose=0) # reload best weights keras_model_conv1d.load_weights('keras_model_conv1d_best.h5') visualize_loss(history_conv1d) visualize('conv1d_loss.png') #GNN START #load data graph_dataset = GraphDataset('gdata_train', features, labels, spectators, n_events=1000, n_events_merge=1, file_names=train_files) graph_dataset.process() #understand data from torch_geometric.data import Data, DataListLoader, Batch from torch.utils.data import random_split torch.manual_seed(0) valid_frac = 0.20 full_length = len(graph_dataset) valid_num = int(valid_frac * full_length) batch_size = 32 train_dataset, valid_dataset = random_split( graph_dataset, [full_length - valid_num, valid_num]) train_loader = DataListLoader(graph_dataset, batch_size=batch_size, pin_memory=True, shuffle=True) train_loader.collate_fn = collate valid_loader = DataListLoader(valid_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) valid_loader.collate_fn = collate train_samples = len(train_dataset) valid_samples = len(valid_dataset) #create gnn model import torch.nn as nn import torch.nn.functional as F import torch_geometric.transforms as T from torch_geometric.nn import EdgeConv, global_mean_pool from torch.nn import Sequential as Seq, Linear as Lin, ReLU, BatchNorm1d from torch_scatter import scatter_mean from torch_geometric.nn import MetaLayer model = InteractionNetwork().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) import os.path as osp n_epochs = 20 stale_epochs = 0 best_valid_loss = 99999 patience = 5 t = tqdm(range(0, n_epochs)) # calculate weights s = 0 for data in graph_dataset: d = data[0].y[0] if s is 0: s = d else: s += d weights = [] for w in s: # wi = (# jets)/(# classes * # jets in class) den = w.item() * 6 num = sum(s).item() weights += [num / den] for epoch in t: loss = train(model, optimizer, train_loader, train_samples, batch_size, leave=bool(epoch == n_epochs - 1), weights=weights) valid_loss = test(model, valid_loader, valid_samples, batch_size, leave=bool(epoch == n_epochs - 1)) print('Epoch: {:02d}, Training Loss: {:.4f}'.format(epoch, loss)) print(' Validation Loss: {:.4f}'.format(valid_loss)) if valid_loss < best_valid_loss: best_valid_loss = valid_loss modpath = osp.join('interactionnetwork_best.pth') print('New best model saved to:', modpath) torch.save(model.state_dict(), modpath) stale_epochs = 0 else: print('Stale epoch') stale_epochs += 1 if stale_epochs >= patience: print('Early stopping after %i stale epochs' % patience) break #load test data test_dataset = GraphDataset('data', features, labels, spectators, n_events=1000, n_events_merge=1, file_names=test_files) test_dataset.process() test_loader = DataListLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) test_loader.collate_fn = collate test_samples = len(test_dataset) #model evaluation model.eval() t = tqdm(enumerate(test_loader), total=test_samples / batch_size) y_test = [] y_predict = [] for i, data in t: data = data.to(device) batch_output = model(data.x, data.edge_index, data.batch) y_predict.append(batch_output.detach().cpu().numpy()) y_test.append(data.y.cpu().numpy()) y_test = np.concatenate(y_test) y_predict = np.concatenate(y_predict) #GNN END # COMPARING MODELS predict_array_dnn = [] predict_array_cnn = [] label_array_test = [] for t in test_generator: label_array_test.append(t[1]) predict_array_dnn.append(keras_model_dense.predict(t[0])) predict_array_cnn.append(keras_model_conv1d.predict(t[0])) predict_array_dnn = np.concatenate(predict_array_dnn, axis=0) predict_array_cnn = np.concatenate(predict_array_cnn, axis=0) label_array_test = np.concatenate(label_array_test, axis=0) fpr_dnn = [] tpr_dnn = [] fpr_cnn = [] tpr_cnn = [] fpr_gnn = [] tpr_gnn = [] # create ROC curves for each class for i in range(nlabels): t_fpr_d, t_tpr_d, thresh_d = roc_curve(label_array_test[:, i], predict_array_dnn[:, i]) t_fpr_c, t_tpr_c, thresh_c = roc_curve(label_array_test[:, i], predict_array_cnn[:, i]) t_fpr_g, t_tpr_g, thresh_g = roc_curve(y_test[:, i], y_predict[:, i]) #appending fpr_dnn.append(t_fpr_d) tpr_dnn.append(t_tpr_d) fpr_cnn.append(t_fpr_c) tpr_cnn.append(t_tpr_c) fpr_gnn.append(t_fpr_g) tpr_gnn.append(t_tpr_g) # plot ROC curves visualize_roc(fpr_cnn, tpr_cnn, fpr_dnn, tpr_dnn, fpr_gnn, tpr_gnn) visualize('fnn_vs_conv1d.pdf')
def train_RGNN(tr_dataset, te_dataset, n_epochs, batch_size, lr, z_dim, K, dropout, adj_type, learn_edge, lambda1, lambda2, domain_adaptation, lambda_dat, label_type, ckpt_save_name=None, ckpt_load=None): # log hyper-parameter logger.critical('batch_size {}, lr {}, z_dim {}, K {}, dropout {}, adj_type {}, learn_edge {}, lambda1 {},' 'lambda2 {}, domain_adaptation {}, lambda_dat {}, label_type {}' .format(batch_size, lr, z_dim, K, dropout, adj_type, learn_edge, lambda1, lambda2, domain_adaptation, lambda_dat, label_type)) # parameter sanity check if label_type not in label_types: raise Exception("undefined label_type") if adj_type not in adj_types: raise Exception("undefined adj_type") # construct model edge_weight = initial_adjacency_matrix(adj_type) model = SymSimGCNNet(n_channels, learn_edge, edge_weight, n_bands, [z_dim], n_classes[label_type], K, dropout, domain_adaptation) last_epoch = 0 if ckpt_load is not None: ckpt = torch.load(ckpt_load) last_epoch = ckpt_load["epoch"] if last_epoch >= n_epochs: raise Exception("loaded model have trained >= n_epochs") state_dict = ckpt_load["state_dict"] model.load_state_dict(state_dict) # use multiple GPU model = DataParallel(model, device_ids=device_ids).to(device) logger.info(model) # prepare dataloader logger.info("tr_dataset: {}".format(tr_dataset)) logger.info("te_dataset: {}".format(te_dataset)) logger.info("training start from epoch {}".format(last_epoch)) tr_loader = DataListLoader(tr_dataset, batch_size, True) # prepare optimizer param_list1 = [] param_list2 = [] for name, param in model.named_parameters(): if name in ['module.edge_weight', 'module.conv1.lin.bias', 'module.fc.bias']: param_list1.append(param) else: param_list2.append(param) optimizer = torch.optim.Adam([ {'params': param_list1, 'weight_decay': 0}, {'params': param_list2, 'weight_decay': lambda2} ], lr=lr) # iterate over all epochs eval_acc_list = [] macro_f1_list = [] for ep in range(last_epoch + 1, n_epochs + 1): model.train() loss_all = 0 reverse_scale = 2 / (1 + math.exp(-10 * ep / n_epochs)) - 1 if domain_adaptation == 'RevGrad': model.module.alpha = reverse_scale # iterate over all graphs for tr_data_list in tr_loader: # output shape (len(tr_data_list), 5 or 1) output, domain_output = model(tr_data_list) # classification loss # y shape (len(tr_data_list), ) y = torch.cat([data.y for data in tr_data_list]).to(output.device) if label_type == "hard": loss = F.cross_entropy(output, y) elif label_type == "soft": loss = - distribution_label(y) * F.log_softmax(output, dim=1) loss = torch.mean(torch.sum(loss, dim=1)) else: loss = F.mse_loss(output, y - 2) # l1 regularization loss if learn_edge: loss += lambda1 * torch.sum(torch.abs(model.module.edge_weight)) # domain adaptation loss if domain_adaptation: # tr_data.x: [num_graph * n_channels, feature_dim] n_nodes = domain_output.size(0) loss += lambda_dat * F.cross_entropy(domain_output, torch.zeros(n_nodes).cuda()) te_indices = torch.randint(0, len(te_dataset), len(tr_data_list)) te_data = te_dataset[te_indices] _, te_domain_output = model(te_data) loss += lambda_dat * F.cross_entropy(te_domain_output, torch.ones(n_nodes).cuda()) loss_all += loss.item() * len(tr_data_list) # optimize the model optimizer.zero_grad() loss.backward() optimizer.step() # evaluate the model accuracy, macro_f1_score = evaluate_RGNN(model, te_dataset, label_type) eval_acc_list.append(accuracy) macro_f1_list.append(macro_f1_score) train_acc, _ = evaluate_RGNN(model, tr_dataset, label_type) logger.info('epoch: {:4d}; loss: {:9.5f}; train acc: {:9.5f}; eval acc: {:9.5f}; ' 'macro f1: {:9.5f};' .format(ep, loss_all/len(tr_dataset), train_acc, accuracy, macro_f1_score)) # save model checkpoint logger.info(list(model.parameters())) logger.info(format_list(model.module.edge_weight.detach().cpu().numpy().flatten())) if ckpt_save_name is not None: checkpoint = {"epoch": n_epochs, "state_dict": model.state_dict()} torch.save(checkpoint, ckpt_dir + '/' + ckpt_save_name) return eval_acc_list, macro_f1_list
def reload(self): for data_list in DataListLoader(self,batch_size=self.__len__()): pass return LoadDataset(name=None, reload_data = self.collate(data_list))
import os.path as osp import torch import torch.nn.functional as F from torch_geometric.datasets import MNISTSuperpixels from torch_geometric.data import DataListLoader import torch_geometric.transforms as T from torch_geometric.nn import SplineConv, global_mean_pool, DataParallel path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data', 'MNIST') dataset = MNISTSuperpixels(path, transform=T.Cartesian()).shuffle() loader = DataListLoader(dataset, batch_size=1024, shuffle=True) class Net(torch.nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = SplineConv(dataset.num_features, 32, dim=2, kernel_size=5) self.conv2 = SplineConv(32, 64, dim=2, kernel_size=5) self.lin1 = torch.nn.Linear(64, 128) self.lin2 = torch.nn.Linear(128, dataset.num_classes) def forward(self, data): print('Inside Model: num graphs: {}, device: {}'.format( data.num_graphs, data.batch.device)) x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr x = F.elu(self.conv1(x, edge_index, edge_attr)) x = F.elu(self.conv2(x, edge_index, edge_attr)) x = global_mean_pool(x, data.batch) x = F.elu(self.lin1(x))
def prepare_train_val_loader(args): setting = CrossValidSetting() sampler_type = None train_dataset_loader = DataListLoader( NucleiDatasetBatchOutput(root=setting.root, feature_type=args.feature_type, split='train', sampling_time=setting.sample_time, sampling_ratio=args.sample_ratio, normalize=args.normalize, dynamic_graph=args.dynamic_graph, sampling_method=args.sampling_method, datasetting=setting, neighbour=args.neighbour, graph_sampler=args.graph_sampler, crossval=args.cross_val), sampler=sampler_type, batch_size=args.batch_size, shuffle=True if sampler_type is None else False, num_workers=args.num_workers, ) validset = NucleiDatasetBatchOutput(root=setting.root, feature_type=args.feature_type, split='valid', sampling_time=setting.sample_time, sampling_ratio=args.sample_ratio, normalize=args.normalize, dynamic_graph=args.dynamic_graph, sampling_method=args.sampling_method, datasetting=setting, neighbour=args.neighbour, graph_sampler=args.graph_sampler, crossval=args.cross_val) val_dataset_loader = DataListLoader( validset, batch_size=args.batch_size, # setting.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=True) if not args.visualization: test_dataset_loader = val_dataset_loader else: # this is for visualization test set testset = NucleiDatasetTest(root=setting.root, feature_type=args.feature_type, split='valid', normalize=args.normalize, sampling_method=args.sampling_method, datasetting=setting, neighbour=args.neighbour, graph_sampler=args.graph_sampler, crossval=args.cross_val) test_dataset_loader = torch.utils.data.DataLoader( testset, batch_size=1, # setting.batch_size, shuffle=False, num_workers=args.num_workers, pin_memory=False) return train_dataset_loader, val_dataset_loader, test_dataset_loader
root=data_path, name="40", train=True, pre_transform=transform(samplePoints=samplePoints), ) test_dataset = ModelNet( root=data_path, name="40", train=False, pre_transform=transform(samplePoints=samplePoints), ) if parallel: train_loader = DataListLoader( train_dataset, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=16, pin_memory=True, ) test_loader = DataListLoader( test_dataset, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=16, pin_memory=True, ) else: train_loader = DataLoader( train_dataset, batch_size=batch_size,
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) train_dataset = TUDataset(os.path.join('data', args.dataset), name=args.dataset, use_node_attr=True, use_edge_attr=False) args.num_features = train_dataset.num_features print(args.dataset, len(train_dataset)) args.num_classes = train_dataset.num_classes args.nhid = args.moco_dim # create model print("=> creating model with HGP-SL") HGP_SL = Model_joint(args) ################################################################################################################################### model = moco.builder.MoCo(HGP_SL, args.moco_dim, args.moco_k, args.moco_m, args.moco_t) for name, param in model.named_parameters(): #print(name,param.requires_grad) param.requires_grad = True print(model) if args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) #optimizer = torch.optim.SGD(model.parameters(), args.lr,momentum=args.momentum,weight_decay=args.weight_decay) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True train_sampler = None num_training = int(len(train_dataset) * 0.8) num_val = int(len(train_dataset) * 0.1) num_test = len(train_dataset) - (num_training + num_val) print('num_training,num_val,num_test', num_training, num_val, num_test) training_set, validation_set, test_set = random_split( train_dataset, [num_training, num_val, num_test]) train_loader = DataListLoader(training_set, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False) min_loss = 1e10 val_loss_values = [] print('begin training') for epoch in range(args.start_epoch, args.epochs): # train for one epoch train(train_loader, val_loader, val_loss_values, model, criterion, optimizer, epoch, args) #print(optimizer) if val_loss_values[-1] < min_loss: min_loss = val_loss_values[-1] best_model = epoch patience_cnt = 0 else: patience_cnt += 1 print('patience', patience_cnt) if not os.path.exists('./results/CSSL-Reg/' + args.dataset + '/' + str(args.batch_size)): os.makedirs('./results/CSSL-Reg/' + args.dataset + '/' + str(args.batch_size)) if (epoch + 1) % 1 == 0: save_checkpoint( { 'epoch': epoch + 1, 'arch': 'HGP-SL', 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename='./results/CSSL-Reg/' + args.dataset + str(args.batch_size) + '/checkpoint_{:05d}.pth.tar'.format(epoch)) if patience_cnt == args.patience or epoch == args.epochs - 1: model.load_state_dict( torch.load('./results/CSSL-Reg/' + args.dataset + str(args.batch_size) + '/checkpoint_{:05d}.pth.tar'.format(best_model)) ['state_dict']) test_acc, test_loss = compute_test(model.encoder_q, test_loader, args) print('Test set results, loss = {:.6f}, accuracy = {:.6f}'.format( test_loss, test_acc)) break
torch.cuda.manual_seed(args.random_seed) if __name__ == '__main__': tokenizer = BertTokenizer.from_pretrained("bert-base-chinese") train_dataset = QAGraphDataset(root='./data', file='./data/train_new.csv', tokenizer=tokenizer, is_test=False) test_dataset = QAGraphDataset(root='./data', file='./data/test_new.csv', tokenizer=tokenizer, is_test=True) train_loader = DataListLoader(train_dataset, batch_size=1, shuffle=False) test_loader = DataListLoader(test_dataset, batch_size=1, shuffle=False) logging.info(f"train data all steps: {len(train_loader)}, " f"test data all steps : {len(test_loader)}") model = HGCNForQAClassification(num_class=2, dropout=args.dropout, pretrained_weight=None, edge_mask=[0, 1, 1, 1, 1], use_bert=True, finetune_bert=args.finetune_bert) model = BertMatch() model = model.cuda(args.device)
model_name = params['model_name'] + '_step_' + str(step) BATCH_SIZE_PER_GPU = params['batch_size'] dataset = WeiboGraphDataset('/sdd/yujunshuai/data/weibo/', w2id=w2id, max_comment_num=params['max_comment_num'], restart_prob=params['restart_prob'], delay=params['delay'], tokenizer=tokenizer, step=step) # dataset = WeiboGraphDataset('/home/tanghengzhu/yjs/data/weibo/', w2id=w2id, max_comment_num=params['max_comment_num'], # restart_prob=params['restart_prob'], delay=params['delay']) train_size = int(0.675 * len(dataset)) dev_size = int(0.225 * len(dataset)) test_size = len(dataset) - train_size - dev_size train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, dev_size, test_size]) train_loader = DataListLoader(train_dataset, batch_size=BATCH_SIZE_PER_GPU * GPU_COUNT, shuffle=True) val_loader = DataListLoader(val_dataset, batch_size=int(BATCH_SIZE_PER_GPU * GPU_COUNT), shuffle=True) test_loader = DataListLoader(test_dataset, batch_size=int(BATCH_SIZE_PER_GPU * GPU_COUNT), shuffle=True) logging.info(f"train data all steps: {len(train_loader)}, " f"validate data all steps : {len(val_loader)}, " f"test data all steps : {len(test_loader)}") model = IARNetForWeiboClassification(num_class=2, dropout=0.3, pretrained_weight=weight, use_image=False, edge_mask=params['edge_mask'], use_bert=params.get('use_bert', False), bert_path=BERT_PATH, finetune_bert=params.get('fine_tune', False), layer=params.get('layer', 1)) # model = load_parallel_save_model('/sdd/yujunshuai/save_model/gear/best-validate-model.pt', model) model = DataParallel(model) model = model.cuda(0)
model = models.EdgeNet(input_dim=input_dim, big_dim=big_dim, hidden_dim=hidden_dim).to(device) optimizer = torch.optim.Adam(model.parameters(), lr = lr) def collate(items): # collate function for data loaders (transforms list of lists to list) l = sum(items, []) return Batch.from_data_list(l) # train, valid, test split torch.manual_seed(0) # lock seed for random_split train_dataset, valid_dataset, test_dataset = random_split(gdata, [fulllen-2*tv_num,tv_num,tv_num]) train_loader = -1 valid_loader = -1 test_loader = -1 if use_sparseloss == False and use_vae == False: train_loader = DataListLoader(train_dataset, batch_size=batch_size, pin_memory=True, shuffle=True) train_loader.collate_fn = collate valid_loader = DataListLoader(valid_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) valid_loader.collate_fn = collate test_loader = DataListLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) test_loader.collate_fn = collate else: train_loader = DataLoader(train_dataset, batch_size=batch_size, pin_memory=True, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=batch_size, pin_memory=True, shuffle=False) train_samples = len(train_dataset) valid_samples = len(valid_dataset) test_samples = len(test_dataset) # load in model
return df # read in dataset bb_name = ["bb0", "bb1", "bb2", "bb3", "rnd"][box_num] print("Plotting %s" % bb_name) save_dir = osp.join(model_fname, bb_name) save_path = osp.join(output_dir, save_dir) Path(save_path).mkdir(exist_ok=True) # make a subfolder if not osp.isfile(osp.join(output_dir, model_fname, bb_name, 'df.pkl')) or overwrite: print("Processing jet losses") gdata = GraphDataset('/anomalyvol/data/lead_2/%s/' % bb_name, bb=box_num) bb_loader = DataListLoader(gdata) proc_jets, input_fts, reco_fts = process(bb_loader, num_events, model_fname, model, loss_ftn_obj, latent_dim, no_E) df = get_df(proc_jets) df.to_pickle(osp.join(output_dir, model_fname, bb_name, 'df.pkl')) torch.save(input_fts, osp.join(output_dir, model_fname, bb_name, 'input_fts.pt')) torch.save(reco_fts, osp.join(output_dir, model_fname, bb_name, 'reco_fts.pt')) else: print("Using preprocessed dictionary") df = pd.read_pickle( osp.join(output_dir, model_fname, bb_name, 'df.pkl')) input_fts = torch.load(
train_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=0, stop=args.n_train)) val_dataset = torch.utils.data.Subset(full_dataset, np.arange(start=args.n_train, stop=args.n_train+args.n_val)) print("train_dataset", len(train_dataset)) print("val_dataset", len(val_dataset)) #hack for multi-gpu training if not multi_gpu: def collate(items): l = sum(items, []) return Batch.from_data_list(l) else: def collate(items): l = sum(items, []) return l train_loader = DataListLoader(train_dataset, batch_size=args.batch_size, pin_memory=False, shuffle=False) train_loader.collate_fn = collate val_loader = DataListLoader(val_dataset, batch_size=args.batch_size, pin_memory=False, shuffle=False) val_loader.collate_fn = collate model_class = model_classes[args.model] model_kwargs = {'input_dim': input_dim, 'hidden_dim': args.hidden_dim, 'encoding_dim': args.encoding_dim, 'output_dim_id': output_dim_id, 'output_dim_p4': output_dim_p4, 'dropout_rate': args.dropout, 'convlayer': args.convlayer, 'convlayer2': args.convlayer2, 'radius': args.radius, 'space_dim': args.space_dim,
import random def set_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) set_seed(1) path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'MNIST') train_dataset = GNNBenchmarkDataset(path, "MNIST").shuffle() test_dataset = GNNBenchmarkDataset(path, "MNIST", "test").shuffle() train_loader = DataListLoader(train_dataset, batch_size=512, shuffle=True) test_loader = DataListLoader(test_dataset, batch_size=512, shuffle=True) def accuracy(scores, targets): scores = scores.detach().argmax(dim=1) acc = (scores==targets).float().sum().item() return acc def train_pyg(model_name, model, train_loader, device, optimizer): global epoch_load_time, epoch_forward_time, epoch_backward_time, epoch_batch_time t10 = time.time() model.train() n_data = 0 loss_all = 0 epoch_train_acc = 0 t4 = time.time()