def __init__(self, config): super().__init__(config) self.dynamics = VehicleTransitionModel().cuda() self.optim = torch.optim.Adam(self.dynamics.parameters()) self.earlystopping = EarlyStopping( patience=self._c.early_stop_patience) self.set_epoch_length()
def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None): self.args = args self.train_dataset = train_dataset self.dev_dataset = dev_dataset self.test_dataset = test_dataset self.early_stopping = EarlyStopping(patience=10, verbose=True) self.config_class, self.model_class, _ = MODEL_CLASSES[ self.args.model_type] # self.config = self.config_class.from_pretrained(self.args.model_name_or_path, num_labels=1, output_hidden_states=True, output_attentions=True) self.model = ParagraphSelector.from_pretrained( self.args.model_name_or_path, num_labels=1) # GPU or CPU self.device = "cuda" if torch.cuda.is_available( ) and not args.no_cuda else "cpu" self.model.to(self.device) print( "***************** Config & Pretrained Model load complete **********************" )
def create_network(npochs): l_in = layers.InputLayer((None, 1, 200, 250)) l_conv1 = layers.Conv2DLayer(l_in, num_filters=32, filter_size=(3, 3)) l_pool1 = layers.MaxPool2DLayer(l_conv1, pool_size=(2, 2)) l_drop1 = layers.DropoutLayer(l_pool1, p=0.1) l_conv2 = layers.Conv2DLayer(l_drop1, num_filters=64, filter_size=(2, 2)) l_pool2 = layers.MaxPool2DLayer(l_conv2, pool_size=(2, 2)) l_drop2 = layers.DropoutLayer(l_pool2, p=0.2) l_conv3 = layers.Conv2DLayer(l_drop2, num_filters=128, filter_size=(2, 2)) l_pool3 = layers.MaxPool2DLayer(l_conv3, pool_size=(2, 2)) l_drop3 = layers.DropoutLayer(l_pool3, p=0.3) l_den1 = layers.DenseLayer(l_drop3, num_units=1000) l_drop4 = layers.DropoutLayer(l_den1, p=0.5) l_den2 = layers.DenseLayer(l_drop4, num_units=1000) l_output = layers.DenseLayer(l_den2, num_units=8, nonlinearity=None) net = NeuralNet( layers=l_output, # learning parameters update=nesterov_momentum, update_learning_rate=theano.shared(np.float32(0.03)), update_momentum=theano.shared(np.float32(0.9)), regression=True, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.03, stop=0.0001), AdjustVariable('update_momentum', start=0.9, stop=0.9999), EarlyStopping(), ], max_epochs=npochs, # maximum iteration train_split=TrainSplit(eval_size=0.2), verbose=1, ) return net
def prepare_optimisers(args, logger, policy_parameters, environment_parameters): if args.optimizer == "adam": optimizer_class = torch.optim.Adam elif args.optimizer == "adadelta": optimizer_class = torch.optim.Adadelta else: optimizer_class = torch.optim.SGD optimizer = { "policy": optimizer_class(params=policy_parameters, lr=args.pol_lr, weight_decay=args.l2_weight), "env": optimizer_class(params=environment_parameters, lr=args.env_lr, weight_decay=args.l2_weight) } lr_scheduler = { "policy": get_lr_scheduler(logger, optimizer["policy"], patience=args.lr_scheduler_patience), "env": get_lr_scheduler(logger, optimizer["env"], patience=args.lr_scheduler_patience) } es = EarlyStopping(mode="max", patience=args.es_patience, threshold=args.es_threshold) return optimizer, lr_scheduler, es
def __init__(self, config: dict, model: nn.Module): super().__init__() self.config = config # model hparam self.device = "cuda" if torch.cuda.is_available() else "cpu" self.root = config["root"] self.model = model.to(self.device) if torch.cuda.is_available(): self.model = nn.DataParallel(self.model) self.num_classes = config["num_classes"] self.criterion = nn.CrossEntropyLoss() # training hparam self.epochs = config["epoch"] self.lr = config["lr"] self.optimizer = self.get_optimizer(config["optimizer"]) self.lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode="min", factor=0.1, patience=10) self.train_batch_size = config["train_batch_size"] self.eval_batch_size = config["eval_batch_size"] self.train_loader, self.val_loader, self.test_loader = get_dataloaders( self.root, self.train_batch_size, self.eval_batch_size) ## TODO: val, test loader # model saving hparam self.save_path = config["ckpt_path"] if not os.path.exists(self.save_path): os.mkdir(self.save_path) self.writer = SummaryWriter(self.save_path) self.global_step = 0 self.eval_step = config["eval_step"] self.earlystopping = EarlyStopping(verbose=True, path=os.path.join(self.save_path))
def main(): # Validation best_recall = -1 early_stopping = EarlyStopping(patience=5, verbose=True) for epoch in range(1, EPOCHS+1): print('Epoch {}/{} '.format(epoch, EPOCHS)) train(model, trn_loader, optimizer, epoch) val_loss, val_recall = evaluate(model, vid_loader) early_stopping(val_recall, model) if early_stopping.early_stop: print("Early stopping") break scheduler.step(val_loss) if val_recall > best_recall: if not os.path.isdir("checkpoint"): os.makedirs("checkpoint") print('###### Model Save, Validation Recall is : {:.4f}, Loss is : {:.4f}'.format(val_recall, val_loss)) torch.save(model.state_dict(), './checkpoint/augmix2_seresnent50_saved_weights.pth') best_recall = val_recall print('Time to train model: {} mins'.format(round((time.time() - start_time) / 60, 2)))
def __init__(self, config): super().__init__(config) if config.type == 'conv': # self.dynamics = ConvTransitionModel2().cuda() self.dynamics = ConvTransitionModel2_2().cuda() # uses corrected action self.get_dataset_sample = self.get_dataset_sample_no_speed self.criterion = F.mse_loss elif config.type == 'conv_speed': self.dynamics = ConvTransitionModel3().cuda() self.get_dataset_sample = self.get_dataset_sample_with_speed self.criterion = F.mse_loss elif config.type == 'class': # self.dynamics = ClassificationModel().cuda() # self.dynamics = ClassificationModel2().cuda() # uses corrected phase action self.dynamics = ClassificationModel3().cuda() # uses limited phase history self.get_dataset_sample = self.get_dataset_sample_for_classification self.criterion = torch.nn.BCELoss() elif config.type == 'latent_fc': self.dynamics = LatentFCTransitionModel().cuda() self.get_dataset_sample = self.get_dataset_sample_for_latent_fc self.criterion = F.mse_loss else: raise NotImplementedError self.optim = torch.optim.Adam(self.dynamics.parameters()) self.earlystopping = EarlyStopping(patience=self._c.early_stop_patience) self.set_epoch_length() self.writer = SummaryWriter(log_dir=config.logdir, purge_step=0)
def __init__(self, config, preprocessing=False): super().__init__(config) self.dynamics = ConvTransitionModel2().cuda() self.optim = torch.optim.Adam(self.dynamics.parameters()) self.earlystopping = EarlyStopping(patience=self._c.early_stop_patience) self.set_epoch_length() self.cutoff = None self.get_dataset_sample = self.get_dataset_sample2 if preprocessing else self.get_dataset_sample1
def __init__(self, model: Any, model_name: str = None): super().__init__(model) self.model_name = model_name self.setup_device self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-2) self.criterion = nn.CrossEntropyLoss() self.early_stopping = EarlyStopping(patience=5, verbose=True) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode="max", patience=5, factor=0.3, verbose=True)
def main(config_path='./configs/config.yaml'): config = load_config(config_path) init_experiment(config) set_random_seed(config.seed) train_dataset = getattr(data, config.train.dataset.type)( config.data_root, **vars(config.train.dataset.params)) train_loader = getattr(data, config.train.loader.type)( train_dataset, **vars(config.train.loader.params)) val_dataset = getattr(data, config.val.dataset.type)( config.data_root, **vars(config.val.dataset.params)) val_loader = getattr(data, config.val.loader.type)(val_dataset, **vars( config.val.loader.params)) device = torch.device(config.device) model = getattr(models, config.model.type)(**vars(config.model.params)).to(device) optimizer = getattr(optims, config.optim.type)(model.parameters(), **vars(config.optim.params)) scheduler = None loss_f = getattr(losses, config.loss.type)(**vars(config.loss.params)) early_stopping = EarlyStopping(save=config.model.save, path=config.model.save_path, **vars(config.stopper.params)) train_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'train')) val_writer = SummaryWriter(log_dir=os.path.join(config.tb_dir, 'val')) for epoch in range(1, config.epochs + 1): print(f'Epoch {epoch}') train_metrics = train(model, optimizer, train_loader, loss_f, device) print_metrics('Train', train_metrics) write_metrics(epoch, train_metrics, train_writer) val_metrics = val(model, val_loader, loss_f, device) print_metrics('Val', val_metrics) write_metrics(epoch, val_metrics, val_writer) early_stopping(val_metrics['avg_weighted_loss'], model) # will save the best model to disk if early_stopping.early_stop: print(f'Early stopping after {epoch} epochs.') break if scheduler: scheduler.step() train_writer.close() val_writer.close() if config.model.save: torch.save( model.state_dict(), config.model.save_path.replace('checkpoint', 'last_checkpoint'))
def __init__(self, sess, k, configs,tr_x,tr_y,val_x,val_y,te_x,te_y,num_items,init_way,logger): self.sess = sess self.configs = configs self.tr_x = tr_x self.tr_y = tr_y self.val_x = val_x self.val_y = val_y self.te_x = te_x self.te_y = te_y self.num_items = num_items self.logger =logger self.rnn_hidden_size = configs.rnn_hidden_size self.batch_size = configs.batch_size self.num_layers = configs.num_layers # Initialize the optimizer self.optimizer_type = configs.optimizer_type self.weight_decay = configs.weight_decay self.momentum = configs.momentum self.lr = configs.lr self.eps = configs.eps self.clip_grad = configs.clip_grad self.clip_grad_threshold = configs.clip_grad_threshold self.lr_decay_step = configs.lr_decay_step self.lr_decay = configs.lr_decay self.lr_decay_rate = configs.lr_decay_rate self.drop_prob_ho = configs.drop_prob_ho self.drop_prob_input = configs.drop_prob_input self.drop_prob_recurrent = configs.drop_prob_recurrent # etc self.k = k self.time_sort = configs.time_sort self.loss_type = configs.loss_type self.n_epochs = configs.n_epochs self.is_shuffle = configs.is_shuffle self.embedding_size = configs.embedding_size self.num_topics = configs.num_topics self.early_stop = EarlyStopping(configs.max_patience) # batch_iterator self.tr_sess_idx = np.arange(len(self.tr_y)) self.val_sess_idx = np.arange(len(self.val_y)) self.te_sess_idx = np.arange(len(self.te_y)) # record best epoch self.max_val_recall = [0 for _ in range(len(self.k))] self.max_te_recall = [0 for _ in range(len(self.k))] self.best_epoch = 0 tr_lengths = [len(s) for s in self.tr_x]; val_lengths = [len(s) for s in self.val_x]; te_lengths = [len(s) for s in self.te_x] tr_maxlen = np.max(tr_lengths); val_maxlen = np.max(val_lengths); te_maxlen = np.max(te_lengths) self.maxlen = np.max([tr_maxlen,val_maxlen,te_maxlen]) self.maxlen = None self.embed_init,self.weight_init,self.bias_init,self.gate_bias_init,self.kern_init = init_way
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets if args['dataset'] == 'Tox21': from dgl.data.chem import Tox21 dataset = Tox21() trainset, valset, testset = split_dataset(dataset, args['train_val_test_split']) train_loader = DataLoader(trainset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) val_loader = DataLoader(valset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) test_loader = DataLoader(testset, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_classification) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: # Interchangeable with other models if args['model'] == 'GCN': model = model_zoo.chem.GCNClassifier(in_feats=args['in_feats'], gcn_hidden_feats=args['gcn_hidden_feats'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) elif args['model'] == 'GAT': model = model_zoo.chem.GATClassifier(in_feats=args['in_feats'], gat_hidden_feats=args['gat_hidden_feats'], num_heads=args['num_heads'], classifier_hidden_feats=args['classifier_hidden_feats'], n_tasks=dataset.n_tasks) loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_roc_auc = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_roc_auc, model) print('epoch {:d}/{:d}, validation roc-auc score {:.4f}, best validation roc-auc score {:.4f}'.format( epoch + 1, args['num_epochs'], val_roc_auc, stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_roc_auc = run_an_eval_epoch(args, model, test_loader) print('test roc-auc score {:.4f}'.format(test_roc_auc))
def __init__(self, model: Any, model_name: str = None): super().__init__(model) self.model_name = model_name self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.optimizer = transformers.AdamW(self.model.parameters(), lr=1e-4) self.criterion = nn.BCEWithLogitsLoss() self.early_stopping = EarlyStopping(patience=5, verbose=True) self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, mode="max", patience=5, factor=0.3, verbose=True)
def train(model_object, model, train_loader, test_loader=None): device = model_object.device if model_object.device.lower() != 'cpu': model_object.device = 'cuda:0' if model_object.log_path and model_object.verbose > 0: log_status(model_object.log_path, 'model_params: {}'.format(str(model_object.params)), init=False) model = model.to(model_object.device) model_object.optimizer = optim.Adam(model.parameters(), lr=model_object.lr, weight_decay=model_object.weight_decay) kwargs = { 'pos_weight': torch.Tensor([model_object.pos_weight]).to(model_object.device), 'reduction': 'mean' } model_object.criterion = LossGenie('BCEWithLogitsLoss', **kwargs) # assisting modules model_object.lr_scheduler = optim.lr_scheduler.StepLR( model_object.optimizer, step_size=model_object.lr_decay_freq, gamma=0.5) model_object.early_stopper = EarlyStopping(model_object.model_dir, patience=model_object.patience, verbose=True) model_object.train_metrics = metrics.BinaryClfMetrics() model_object.test_metrics = metrics.BinaryClfMetrics() for ep in range(1, model_object.epoch + 1): # train for an epoch train_epoch(ep, model_object, model, train_loader, test_loader) # validation for an epoch if test_loader is not None: valid_epoch(ep, model_object, model, test_loader) # return if early stop if model_object.early_stopper.early_stop: return model model_object.lr_scheduler.step() if model_object.log_path and model_object.verbose > 0: log_status(model_object.log_path, 'current lr: {}'.format( model_object.lr_scheduler.get_lr()), init=False) return model
def __init__(self, args, model, pre_data): self.args = args self.model = model self.device = args.device self.max_epoch = self.args.max_epoch self.set_seed() self.dataset = args.dataset self.predata = pre_data self.earlystopping = EarlyStopping(args.early_stopping)
def __init__(self, model: Any, model_name: str = None, num_training_steps: int = 10000 / 8 * 12): super().__init__(model) self.model_name = model_name self.num_training_steps = num_training_steps self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') self.criterion = nn.BCEWithLogitsLoss() self.early_stopping = EarlyStopping(patience=5, verbose=True) self.setup_optimizer_and_scheduler
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: model = load_model(args) if args['model'] in ['SCHNET', 'MGCN']: model.set_mean_std(train_set.mean, train_set.std, args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(mode='lower', patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(config): use_cuda = True dataset_classes = config.num_classes model, _ = load_models.load_model(config.model_type, dataset_classes, config.pretrained) device = torch.device("cuda" if use_cuda else "cpu") log_dir = config.save_dir.format(**config) writer = SummaryWriter(log_dir=log_dir) train_loader, val_loader, test_loader = get_datasets(config) #optim = get_optimizer(config,model) optim = torch.optim.SGD(model.parameters(), config.lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, 5) early_stopping = EarlyStopping() model.cuda() initial = test(model, test_loader, device, writer, config, 1, True) # / shared / rsaas / michal5 / classes / 498 # _dl / cs498_finalproject / outputs / dataset_name = airplane / pretrained = True, self_train = False / opt = sgd, bs = 64, lr = 0.01 / model / acc:0.71875.pt #model = load_model('/shared/rsaas/michal5/classes/498_dl/cs498_finalproject/outputs/dataset_name=airplane/pretrained=True,self_train=True/opt=sgd,bs=64,lr=0.01'+'/model/'+'acc:0.7185.pt',model) epoch_reached = 0 #model = load_model(config.save_dir.format(**config)+'/model/'+'acc:0.71875.pt',model) #test_acc = test(model,test_loader,device,writer,config,1,True) if config.train: for epoch in range(config.epochs): train(train_loader, model, optim, epoch, device, writer, config) val_acc = test(model, test_loader, device, writer, config, epoch, test=False) early_stopping(val_acc) if early_stopping.early_stop: epoch_reached = epoch break scheduler.step() test_acc = test(model, test_loader, device, writer, config, epoch_reached, test=True) fname = os.path.join(config.save_dir.format(**config), 'best_accuracy' + '.json') os.makedirs(os.path.dirname(fname), exist_ok=True) with open(fname, 'w') as f: json.dump({'test_accuracy': test_acc}, f) tqdm.write(f'Saved accuracy results to {fname}') print('test accuracy:', test_acc)
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['model'] == 'MPNN': model = model_zoo.chem.MPNNModel(node_input_dim=args['node_in_feats'], edge_input_dim=args['edge_in_feats'], output_dim=args['output_dim']) elif args['model'] == 'SCHNET': model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) elif args['model'] == 'MGCN': model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) model.to(args['device']) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(mode='lower', patience=args['patience']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, \ val_mask, test_mask = load_data(args['dataset']) dev = torch.device("cuda:0" if args['gpu'] >= 0 else "cpu") features = features.to(dev) labels = labels.to(dev) train_mask = train_mask.to(dev) val_mask = val_mask.to(dev) test_mask = test_mask.to(dev) model = HAN(meta_paths=[['pa', 'ap'], ['pf', 'fp']], in_size=features.shape[1], hidden_size=args['hidden_units'], out_size=num_classes, num_heads=args['num_heads'], dropout=args['dropout']).to(dev) stopper = EarlyStopping(patience=args['patience']) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) for epoch in range(args['num_epochs']): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask]) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( 'Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | ' 'Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}'. format(epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1)) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn) print('Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}'. format(test_loss.item(), test_micro_f1, test_macro_f1))
def train(self, model, train_loader, valid_loader, workers): criterion = BCELoss() # binary cross-entropy # for RMSprop in PySyft each worker needs its own optimizer worker_ids = [worker.id for worker in workers] optims = Optims(worker_ids, optim=RMSprop(model.parameters(), lr=self.model_config.learning_rate)) early_stopping = EarlyStopping( patience=self.model_config.early_stopping_patience) epochs_finished = 0 for _ in range(self.model_config.epochs): model.train() for data, target in train_loader: # At least two samples are needed for training. # This may cause loosing up to N examples in training where N is number of workers. if len(data) < 2: continue model.send(data.location) opt = optims.get_optim(data.location.id) opt.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() opt.step() model.get() model.eval() valid_losses = [] for data, target in valid_loader: model.send(data.location) output = model(data) loss = criterion(output, target) valid_losses.append(loss.get().item()) model.get() valid_loss = np.average(valid_losses) epochs_finished += 1 if early_stopping.should_early_stop(valid_loss, model): break model.load_state_dict(early_stopping.best_model_state) return model, epochs_finished
def train_epochs(train_dataloader, val_dataloader, model, loss_fn, num_epochs, save_path): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', verbose=True) stopper = EarlyStopping(verbose=True, path=os.path.join(save_path, 'unet_model_best.pth'), patience=15, mode='max') steps = len(train_dataloader.dataset) // train_dataloader.batch_size best_model = model = model.to(device) start = time.time() train_losses = [] train_ious = [] val_losses = [] val_ious = [] for epoch in range(1, num_epochs + 1): print('-' * 10) print('Epoch {}/{}'.format(epoch, num_epochs)) running_iou = [] running_loss = [] for step, (x, y) in enumerate(train_dataloader): loss, iou = train(model, x, y, loss_fn, optimizer, device) running_iou.append(iou) running_loss.append(loss) print('\r{:6.1f} %\tloss {:8.4f}\tIoU {:8.4f}'.format( 100 * (step + 1) / steps, loss, iou), end="") print('\r{:6.1f} %\tloss {:8.4f}\tIoU {:8.4f}\t{}'.format( 100 * (step + 1) / steps, np.mean(running_loss), np.mean(running_iou), timeSince(start))) print('running validation...', end='\r') val_loss, val_iou = validate(val_dataloader, model, loss_fn, device) print('Validation: \tloss {:8.4f} \tIoU {:8.4f}'.format( val_loss, val_iou)) scheduler.step(np.mean(running_iou)) train_losses.append(loss) train_ious.append(iou) val_losses.append(val_loss) val_ious.append(val_iou) stopper(val_iou, model) if stopper.early_stop: break return (train_losses, val_losses), (train_ious, val_ious), best_model
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) # Interchangeable with other datasets dataset, train_set, val_set, test_set = load_dataset_for_classification( args) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = model_zoo.chem.load_pretrained(args['exp']) else: args['n_tasks'] = dataset.n_tasks model = load_model(args) loss_criterion = BCEWithLogitsLoss( pos_weight=dataset.task_pos_weights.to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def __init__(self, args, subdir='', worker=None): self.args = args self.worker = worker self.loss_func = F.cross_entropy if self.worker.multi_label == 1 \ else F.binary_cross_entropy_with_logits self.mode = self.worker.mode self.dataset = self.worker.dataset self.subdir = subdir self.gcnt_train = self.gcnt_valid = 0 if self.args.early: self.early_stopping = EarlyStopping(patience=self.args.patience) if subdir: self.init_all_logging(subdir)
def train(model, train_data, valid_data, epochs, batch_size, patience): writer = tbx.SummaryWriter() optimizer = torch.optim.Adam(model.parameters()) early_stopping = EarlyStopping(patience=patience) for epoch in range(epochs): start = time() # training phase model.train() train_losses = [] for i, minibatch in batch_generator(train_data, batch_size): model.zero_grad() loss, _, _ = model(minibatch) loss.backward() optimizer.step() train_losses.append(loss.item()) # validation phase model.eval() valid_losses = [] for i, minibatch in batch_generator(valid_data, batch_size): with torch.no_grad(): loss, _, _ = model(minibatch) valid_losses.append(loss.item()) end = time() train_loss = np.mean(train_losses) valid_loss = np.mean(valid_losses) writer.add_scalar('train_loss', train_loss, global_step=(epoch + 1)) writer.add_scalar('valid_loss', valid_loss, global_step=(epoch + 1)) print( "Epoch {0} \t train loss: {1} \t valid loss: {2} \t exec time: {3}s" .format((epoch + 1), train_loss, valid_loss, end - start)) if patience is not None: early_stopping(model, valid_loss) if early_stopping.is_stop(): print("Early stopping.") model.load_state_dict(torch.load('checkpoint.pt')) break writer.close() return model
def pre_train(): patience = args.patience epochs = args.epochs if args.model_choice == 'gcn_rand': model = GCN_RAND(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, nnode=labels.shape[0], labels=labels, args=args, predictions=None, adj=adj) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_choice == 'tk_rand': model = snowball_rand(args=args, adj=adj, labels=labels, nnode=labels.shape[0], nfeat=features.shape[1], nlayers=args.layers, nhid=args.hidden, nclass=labels.max().item() + 1, dropout=args.dropout, activation=activation, predictions=None) class_optimizer = eval('optim.%s' % args.optimizer) args.lr = 0.05 optimizer = class_optimizer(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.model_choice == 'gs_rand': model = SupervisedGraphSage_Rand(labels.max().item() + 1, args, features, adj, None, 25, 12, device) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) if args.cuda: model.to(device) t_total = time.time() early_stopping = EarlyStopping(patience=patience, verbose=False) best_val, best_test, best_prob = 0, 0, None for epoch_num in range(epochs): acc_train, acc_val, acc_test, last_output_prob = train(model, optimizer, epoch_num, features) if acc_val >= best_val: best_val, best_test = acc_val, acc_test best_prob = last_output_prob early_stopping(-acc_val, model) if early_stopping.early_stop: # print("Early stopping") break print("GNN: Val acc = {:.4f}, Test acc = {:.4f} ".format(best_val, best_test)) return best_prob, best_test
def main(args): args['device'] = "cuda" if torch.cuda.is_available() else "cpu" set_random_seed() # Interchangeable with other datasets if args['dataset'] == 'Alchemy': from dgl.data.chem import TencentAlchemyDataset train_set = TencentAlchemyDataset(mode='dev') val_set = TencentAlchemyDataset(mode='valid') train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_regression) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs_for_regression) if args['model'] == 'MPNN': model = model_zoo.chem.MPNNModel(output_dim=args['output_dim']) elif args['model'] == 'SCHNET': model = model_zoo.chem.SchNet(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) elif args['model'] == 'MGCN': model = model_zoo.chem.MGCNModel(norm=args['norm'], output_dim=args['output_dim']) model.set_mean_std(train_set.mean, train_set.std, args['device']) model.to(args['device']) loss_fn = nn.MSELoss() score_fn = nn.L1Loss() optimizer = torch.optim.Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(mode='lower', patience=args['patience']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, score_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader, score_fn) early_stop = stopper.step(val_score, model) print('epoch {:d}/{:d}, validation score {:.4f}, best validation score {:.4f}'.format( epoch + 1, args['num_epochs'], val_score, stopper.best_score)) if early_stop: break
def train(self, obs_data, lr=0.005, n_iter=5000): params = self.F optimizer = Adam(params, lr=lr, betas=(0.95, 0.999)) early_stop = EarlyStopping(delta=0.000001, patience=5) for i in range(20): for _ in range(n_iter): optimizer.zero_grad() obs_probs = self.get_neg_llh(obs_data) obs_probs.backward() optimizer.step() curr_loss = obs_probs.item() early_stop(curr_loss) if early_stop.early_stop: break mu_obs = self.llh_model(obs_data) u_obs = mu_obs - obs_data self.diag_std = torch.from_numpy( np.std(u_obs.detach().numpy(), axis=1).astype(np.float32))
def prepare_optimiser(args, logger, parameters): if args.optimizer == "adam": optimizer_class = torch.optim.Adam elif args.optimizer == "amsgrad": optimizer_class = partial(torch.optim.Adam, amsgrad=True) elif args.optimizer == "adadelta": optimizer_class = torch.optim.Adadelta else: optimizer_class = torch.optim.SGD optimizer = optimizer_class(params=parameters, lr=args.lr, weight_decay=args.l2_weight) lr_scheduler = get_lr_scheduler(logger, optimizer, patience=args.lr_scheduler_patience, threshold=args.lr_scheduler_threshold) es = EarlyStopping(mode="max", patience=args.es_patience, threshold=args.es_threshold) return optimizer, lr_scheduler, es
def build_model1(nlayers, epochs, frozen=False): net3 = NeuralNet( layers=nlayers, # learning parameters update=lasagne.updates.nesterov_momentum, update_learning_rate=theano.shared(np.float32(0.01)), update_momentum=theano.shared(np.float32(0.9)), regression=True, on_epoch_finished=[ AdjustVariable('update_learning_rate', start=0.01, stop=0.00001), AdjustVariable('update_momentum', start=0.9, stop=0.9999), EarlyStopping(1000) ], max_epochs=epochs, # maximum iteration train_split=TrainSplit(eval_size=0.4), verbose=1, ) if frozen: for layer in net3.layers[:frozenlayers]: layer.trainable = False return net3