def learn(self): for epsilon, M in self.epsilon_strategy: episode = 1 while episode <= M: print('Epsilon:', epsilon, 'Episode:', episode, '/', M) sequence = self.sample_network(epsilon) sequence_hashed = SequenceGenerator.hash(sequence) while sequence_hashed in self.memory: sequence = self.sample_network(epsilon) sequence_hashed = SequenceGenerator.hash(sequence) trainer = Trainer(sequence) history = trainer.train(self.data['X_train'], self.data['y_train'], self.data['X_val'], self.data['y_val']) evaluation = {'val_loss': history.history['val_loss'][-1]} for metric in METRICS: evaluation[f'val_{metric}'] = history.history[f'val_{metric}'][-1] print('Network:', '--'.join(sequence_hashed.split('\n'))) print('Evaluation:', evaluation) print() self.memory.add(sequence_hashed, evaluation, epsilon) for sample in self.memory.sample(): self._update_q_values_sequence(sample[0], REWARD_FUNCTION(sample[1])) episode += 1 self.q_values.save(epsilon) self.memory.save(epsilon)
def test_calc_vector_EVIDENCE(): """Tests the Trainer.calc_vector_EVIDENCE function.""" t = Trainer() EVIDENCE_vec = t.calc_vector_EVIDENCE() fname = DEFAULT_TEST_PATH + "test_calc_vector_EVIDENCE.txt" np.savetxt(fname, EVIDENCE_vec)
def run_mlp_conv_compare_experiment(model_dict_conv, model_dict_mlp, train_dict, out_dir, test_data): np.random.seed(12345) results = defaultdict(list) for model, model_dict in [(MlpNet(**model_dict_mlp), model_dict_mlp), (ConvNet(**model_dict_conv), model_dict_conv)]: label = f'model={model.name}' print(f'{label}') train_dict['callbacks'][1] = ModelDump( output_dir=os.path.join(out_dir, label)) train_dict['callbacks'][2] = SaveBestModel( output_dir=os.path.join(out_dir, label)) trainer = Trainer(model, **train_dict) start_time = time() trainer.train_loop() time_period = (time() - start_time) / 60 log_data = trainer.logger.logging_data results['model_dict'].append(model_dict) results['train_dict'].append(train_dict) results['time'].append(time_period) results['label'].append(label) results['log_data'].append(log_data) calc_test_accuracy(model, test_data, train_dict) save_results(out_dir, results_dict=results) return results
def run_experiment(experiment_generator, out_dir, test_data): np.random.seed(12345) results = defaultdict(list) for i, (model_dict, train_dict, exp_name, value) in enumerate(experiment_generator()): label = f'{exp_name}={value}' print(f'{i}. {label}') train_dict['callbacks'][1] = ModelDump( output_dir=os.path.join(out_dir, label)) train_dict['callbacks'][2] = SaveBestModel( output_dir=os.path.join(out_dir, label)) model = ConvNet(**model_dict) trainer = Trainer(model, **train_dict) start_time = time() trainer.train_loop() time_period = (time() - start_time) / 60 log_data = trainer.logger.logging_data results['model_dict'].append(model_dict) results['train_dict'].append(train_dict) results['time'].append(time_period) results['label'].append(label) results['log_data'].append(log_data) calc_test_accuracy(model, test_data, train_dict) save_results(out_dir, results_dict=results) return results
def run_dropout_experiment(model_dict, train_dict, out_dir): np.random.seed(12345) results = defaultdict(list) model_dict['dropout'] = True kernel_size = model_dict['kernel_size'] padding = model_dict['padding'] label = f'kernel={kernel_size}x{kernel_size}, pad={padding}' model = ConvNet(**model_dict) train_dict['callbacks'][1] = ModelDump( output_dir=os.path.join(out_dir, label)) train_dict['callbacks'][2] = SaveBestModel( output_dir=os.path.join(out_dir, label)) trainer = Trainer(model, **train_dict) start_time = time() trainer.train_loop() time_period = (time() - start_time) / 60 log_data = trainer.logger.logging_data results['model_dict'].append(model_dict) results['train_dict'].append(train_dict) results['time'].append(time_period) results['label'].append(label) results['log_data'].append(log_data) calc_test_accuracy(model, test_data, train_dict) save_results(out_dir, results_dict=results) return results
def training_pipeline(config, desired_controls, desired_deviation): """ Pretrain policy with given control sequence and simple model, then train again last layers with complex model. """ # ODE model to start with model = SimpleModel() trainer = Trainer(model, config) # pretrain policy with given control sequence trainer.pretrain(desired_controls, desired_deviation) # on-policy training trainer.train() # more complex variation of same ODE model new_model = ComplexModel() trainer.set_model(new_model) # freeze all policy layers except last ones shift_grad_tracking(trainer.policy, False) shift_grad_tracking(trainer.policy.out_means, True) shift_grad_tracking(trainer.policy.out_sigmas, True) # retrain on-policy last layers trainer.train(post_training=True)
def run_experiment(experiment_generator, out_dir, test_data, plot_loss_batch=False): np.random.seed(12345) results = defaultdict(list) for i, (model_dict, train_dict, exp_name, value) in enumerate(experiment_generator()): model = MlpNet(**model_dict) trainer = Trainer(model, **train_dict) label = f'{exp_name}={value}' print(f'{i}. {label}') start_time = time() trainer.train_loop() time_period = time() - start_time log_data = trainer.logger.logging_data if plot_loss_batch: # plot train loss per batch in first epoch filename = exp_name + str(value) + '_loss_one_batch' plot_val_loss_per_batch(log_data['loss_batch']['train'], filename, out_dir) results['model_dict'].append(model_dict) results['train_dict'].append(train_dict) results['time'].append(time_period) results['label'].append(label) results['log_data'].append(log_data) # calculate accuracy on test data acc_metric = LabelAccuracy() x_test, y_test = test_data accuracy = acc_metric(model.predict_classes(x_test), y_test) print('Accuracy on test data: {}'.format(accuracy)) return results
def test_calc_vector_MLE(): """Tests the Trainer.calc_vector_MLE function.""" print("\tTesting Trainer.calc_vector_MLE()...") t = Trainer() MLE_vec = t.calc_vector_MLE() fname = DEFAULT_TEST_PATH + "test_calc_vector_MLE.txt" test_file = open(fname, "w") print(MLE_vec, file=test_file) test_file.close()
def test_stop_train(): print("\tTesting Trainer.train(stop_words=True)...") t = Trainer(stop_words=True) t.train() MLE_vec, MAP_matrix, EVIDENCE_vec = t.generate_model() fname = DEFAULT_TEST_PATH + "test_generate_stop_model.txt" test_f = open(fname, "w") mle_f = open(t.DEFAULT_MLE_FILENAME, 'r') map_f = open(t.DEFAULT_MAP_FILENAME, 'r') print("MLE:{} \n\nMAP:{}".format(mle_f, map_f), file=test_f) map_f.close() mle_f.close() test_f.close()
def test_calc_matrix_MAP(): """Tests the Trainer.calc_matrix_MAP function.""" print("\tTesting Trainer.calc_matrix_MAP()...") t = Trainer() MLE_vec = t.calc_vector_MLE() #required because it sets labeldict. MAP_matrix = t.calc_matrix_MAP() fname = DEFAULT_TEST_PATH + "test_calc_matrix_MAP.txt" test_f = open(fname, "w") print(MAP_matrix, file=test_f) print("shape:{}".format(MAP_matrix.shape), file=test_f) print("type:{}".format(MAP_matrix.dtype), file=test_f) print("min:{}".format(MAP_matrix.min()), file=test_f) print("max:{}".format(MAP_matrix.max()), file=test_f) test_f.close()
def main(): args = get_args() path = args['path'] min_len = args['min_len'] max_len = args['max_len'] n_workers = args['worker'] voc_n_keep = args['voc_size'] batch_size = args['batch'] shuffle = args['shuffle'] embed_dim = args['embed'] d_model = args['d_model'] n_layers = args['layers'] heads = args['heads'] d_ff = args['dff'] dropout = args['dropout'] trainable = args['trainable'] epochs = args['epoch'] save_dir = args['save'] loss_func = F.cross_entropy # Retrieves the dataset, cleans, processes and creates tensors from it training_set = Dataset(path, min_len, max_len, n_workers, voc_n_keep) vocab_size = training_set.vocab.num_words target_pad = training_set.vocab.PAD_token # Pytorchs batch generator training_iter = DataLoader(training_set, batch_size, shuffle, num_workers=n_workers) pretrained = None if args['glove']: embed_dim = args['glove_size'] print("Collecting GloVe embeddings size {}".format(embed_dim)) pretrained = get_glove(embed_dim, training_set.vocab, args['glove_path']) print("Successfully collected.") # Creates model trainer = Trainer(vocab_size, embed_dim, d_model, n_layers, heads, d_ff, max_len, pretrained, trainable, dropout) # Train model trainer.train(training_iter, loss_func, epochs, target_pad, save_dir, training_set.vocab)
def create_trainer(classifier, batch_size, num_epochs, training_data, test_data): training_data_feeder = data.BilexDataFeeder(training_data, batch_size, shuffle=True) trainer = Trainer(classifier, num_epochs, training_data_feeder) test_data_feeder = data.BilexDataFeeder(test_data, batch_size, shuffle=True) training_lexicon = load_lexicon(training_data) trainer.add_command(EpochLossLogger(classifier, LOG_DIR)) trainer.add_command(BasicStatsLogger(classifier, training_data_feeder, num_epochs, 10)) trainer.add_command(Evaluation(classifier, test_data_feeder, training_lexicon, num_epochs, LOG_DIR, ['all'])) return trainer
def run_evaluation(datasets, model, verbose=True): results = [] if verbose: print("Training Set Results") train_acc, train_loss = Trainer.evaluate( model, datasets.get_loader(DatasetType.Train), verbose=verbose, ) results.append(train_acc) results.append(train_loss) time.sleep(0.1) if verbose: print("") print("Validation Set Results") val_acc, val_loss = Trainer.evaluate( model, datasets.get_loader(DatasetType.Validation), verbose=verbose, ) results.append(val_acc) results.append(val_loss) time.sleep(0.1) if verbose: print("") print("Test Set Results") test_acc, test_loss = Trainer.evaluate( model, datasets.get_loader(DatasetType.Test), verbose=verbose, ) results.append(test_acc) results.append(test_loss) time.sleep(0.1) if verbose: print("") print("Output for results.md") print((" {:.3f} |" * len(results)).format(*results)) return results
def exp1(opt): model = getattr(models.concrete.single, opt.model)(opt).to(device) opt.exp_name += opt.model vd = VisionDataset(opt, class_order=list(range(10))) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9) logger = get_logger(folder=opt.log_dir + '/' + opt.exp_name + '/') logger.info(f'Running with device {device}') logger.info("==> Opts for this training: " + str(opt)) trainer = Trainer(opt, logger, device=device) # pretraining if opt.num_pretrain_classes > 0: try: logger.info('Trying to load pretrained model...') model = load_pretrained_model(opt, model, logger) pretrain = False except Exception as e: logger.info(f'Failed to load pretrained model: {e}') pretrain = True if pretrain: assert opt.num_pretrain_passes > 0 logger.info(f'==> Starting pretraining') for epoch in range(1, opt.num_pretrain_passes + 1): trainer.train(loader=vd.pretrain_loader, model=model, optimizer=optimizer, epoch=epoch) acc = trainer.test(loader=vd.pretest_loader, model=model, mask=vd.pretrain_mask, epoch_or_phase=epoch) logger.info(f'==> Pretraining completed! Acc: [{acc:.3f}]') save_pretrained_model(opt, model) if opt.num_tasks > 0: # TODO: use another optimizer? # Class-Incremental training # We start with pretrain mask bvecause in testing we want pretrained classes included logger.info(f'==> Starting Class-Incremental training') mask = vd.pretrain_mask.clone() if opt.num_pretrain_classes > 0 else torch.zeros(vd.n_classes_in_whole_dataset) dataloaders = vd.get_ci_dataloaders() cl_accuracy_meter = AverageMeter() for phase, (trainloader, testloader, class_list, phase_mask) in enumerate(dataloaders, start=1): trainer.train(loader=trainloader, model=model, optimizer=optimizer, phase=phase) # accumulate masks, because we want to test on all seen classes mask += phase_mask # this is the accuracy for all classes seen so far acc = trainer.test(loader=testloader, model=model, mask=mask, epoch_or_phase=phase) cl_accuracy_meter.update(acc) logger.info(f'==> CL training completed! AverageAcc: [{cl_accuracy_meter.avg:.3f}]')
def __init__(self, process_id, gpu='cpu', world_size=4, optimizer=optim.Adam, optimizer_sparse=optim.SparseAdam, optim_params=(1e-3, (0.9, 0.995), 1e-8), model_params=None, tb=None): super(Learner, self).__init__() print(gpu) self.model = Policy_Network(data_parallel=False) saved_checkpoint = torch.load("./checkpoint.pth") self.model.load_state_dict(saved_checkpoint['model'], strict=False) if process_id == 0: optim_params = (self.model.parameters(),) + optim_params self.optimizer = optimizer(*optim_params) self.meta_optimizer = optim.SGD(self.model.parameters(), 0.03) self.process_id = process_id self.device='cuda:'+str(process_id) if gpu is not 'cpu' else gpu self.model.to(self.device) self.num_iter = 0 self.world_size = world_size self.original_state_dict = {} self.eps = np.finfo(np.float32).eps.item() self.use_ml = False self.use_rl = False self.trainer = Trainer(self.use_ml, self.use_rl, self.device) self.tb = tb
def train(file_name, net_file=None): trainer = Trainer(file_name, net_file) trainer.train() file_name = input( "Do you want to save the model? Specify file name if so, or leave blank otherwise: " ) if len(file_name) > 0: trainer.save(file_name)
def synthesize(args): device = CUDA(0) # Generating Training Data gt_paths = [ f'{const.DATA_ROOT}/{args.train_mesh}/{args.train_mesh}_level{i:02d}.obj' for i in range(6) ] is_generated = all(list(os.path.isfile(gt_path) for gt_path in gt_paths)) if (not is_generated) or args.no_cache: gen_args = options.GtOptions(tag='demo', mesh_name=args.train_mesh, template_name='sphere', num_levels=6) gt_gen = GroundTruthGenerator(gen_args, device) print("Finished generating training data with " + args.train_mesh, flush=True) # Training Synthesizer options_path = f'{const.PROJECT_ROOT}/checkpoints/{args.train_mesh}_demo/options.pkl' models_path = f'{const.PROJECT_ROOT}/checkpoints/{args.train_mesh}_demo/SingleMeshGenerator.pth' is_trained = os.path.isfile(options_path) and os.path.isfile(models_path) train_args = options.TrainOption(tag='demo', mesh_name=args.train_mesh, template_name='sphere', num_levels=6) if (not is_trained) or args.no_cache: trainer = Trainer(train_args, device) trainer.train() print("Finished training with " + args.train_mesh, flush=True) # Synthesizing Input m2m = Mesh2Mesh(train_args, CPU) mesh = mesh_utils.load_real_mesh(args.input_mesh, 0, True) out = m2m(mesh, 2, 5, 0) out.export(f'{const.RAW_MESHES}/{args.input_mesh}_hi') print("Finished synthesizing input on " + args.input_mesh, flush=True)
def restore_model(config, checkpoint_dir=None, checkpoint_file=None): if checkpoint_dir == None: checkpoint_dir = Trainer(None, None, None, config["trainer"]).checkpoint_dir() if checkpoint_file == None: checkpoint_file = checkpoint_dir + "/best" model = create_model(config, training=False) print("checkpoint_file", checkpoint_file) config = tf.ConfigProto() # pylint:disable=no-member config.gpu_options.allow_growth = True sess = tf.Session(graph=model.graph, config=config) with model.graph.as_default(): # pylint:disable=not-context-manager saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, checkpoint_file) return sess, model
def save_model(config, input_graph=None, checkpoint_dir=None): _sess, model = restore_model(config, checkpoint_dir) if input_graph == None: trainer = Trainer(None, None, None, config["trainer"]) tf.train.write_graph(model.graph, trainer.log_dir(), "final.pb", False) input_graph = trainer.log_dir() + "/final.pb" if checkpoint_dir == None: checkpoint_dir = trainer.checkpoint_dir() freeze_graph( input_graph=input_graph, input_checkpoint=checkpoint_dir + "/best", output_graph=trainer.log_dir() + "/final_frozen.pb", output_node_names=model.output_node_names, input_binary=True, input_saver="", restore_op_name="save/restore_all", filename_tensor_name="save/Const:0", clear_devices=True, initializer_nodes="", variable_names_blacklist="", )
data_loader, _ = get_mnist_dataloaders(batch_size=64) #data_loader, _ = get_fashion_mnist_dataloaders(batch_size=64) #data_loader = get_lsun_dataloader(path_to_data="/ubc/cs/research/plai-scratch/saeid/datasets/lsun", batch_size=64) img_size = (32, 32, 1) generator = Generator(img_size=img_size, latent_dim=100, dim=16) discriminator = Discriminator(img_size=img_size, dim=16) print(generator) print(discriminator) # Initialize optimizers lr = 1e-4 betas = (.5, .9) G_optimizer = optim.Adam(generator.parameters(), lr=lr, betas=betas) D_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=betas) # Train model epochs = 200 trainer = Trainer(generator, discriminator, G_optimizer, D_optimizer, use_cuda=torch.cuda.is_available()) trainer.train(data_loader, epochs, save_training_gif=True) # Save models name = 'mnist_model' torch.save(trainer.G.state_dict(), './gen_' + name + '.pt') torch.save(trainer.D.state_dict(), './dis_' + name + '.pt')
elements = ["Cu"] size = (3, 3, 3) temp = 500 n_train = int(2e4) n_test = int(8e3) save_interval = 100 train_traj = "training.traj" test_traj = "test.traj" max_steps = int(2e3) cutoff = Polynomial(6.0, gamma=5.0) num_radial_etas = 6 num_angular_etas = 10 num_zetas = 1 angular_type = "G4" trn = Trainer(cutoff=cutoff) trn.create_Gs(elements, num_radial_etas, num_angular_etas, num_zetas, angular_type) trjbd = TrajectoryBuilder() calc = EMT() train_atoms = trjbd.build_atoms(system, size, temp, calc) calc = EMT() test_atoms = trjbd.build_atoms(system, size, temp, calc) steps, train_traj = trjbd.integrate_atoms( train_atoms, train_traj, n_train, save_interval ) steps, test_traj = trjbd.integrate_atoms( test_atoms, test_traj, n_test, save_interval )
from keras.optimizers import Adam from models.generators import vgg_generator, test_generator from models.discriminators import vgg_discriminator, test_discriminator from training import Trainer from utils.Utils import save_imgs, prepare_directories prepare_directories() logging.basicConfig(filename='app.log', filemode='w', format='[%(asctime)s] %(message)s') logging.info('Starting...') SIZE = 32 channels = 3 img_shape = (SIZE, SIZE, channels) latent_dim = 256 generator = test_generator(latent_dim, channels) disc = test_discriminator(img_shape) data_provider = DataProviderFactory.get_generator('cifar') configuration = { 'optimizer' : Adam(0.0002, 0.5), 'discriminator' : disc, 'generator' : generator, 'latent_dim' : latent_dim, 'data_provider': data_provider } trainer = Trainer.from_configuration(configuration) trainer.train(epochs=100001, batch_size=128, save_interval=5, interval_function=save_imgs, notebook_mode = False)
import argparse from training import Trainer # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('--nolog', help='run without logging', action="store_true") parser.add_argument('--pipeline', help='specify the training pipeline (see README)') args = parser.parse_args() if not args.nolog: print 'Logging enabled - please make sure all relevant changes are committed!' print 'Use --nolog or --n flag to disable logging.' #print args try: trainer = Trainer(pipeline=args.pipeline, disable=args.nolog) except: print "Could not set up trainer!" raise trainer.train_all() #trainer.test_all() trainer.log_validation_error() trainer.log_probe_error()
"energy_rmse": 1e-16, "force_rmse": None, "max_steps": max_steps } force_coefficient = None overfit = 1e-7 hidden_layers = [10, 10] cutoff = Polynomial(5.0, gamma=5.0) num_radial_etas = 7 num_angular_etas = 11 num_zetas = 1 angular_type = "G4" trn = Trainer( convergence=convergence, force_coefficient=force_coefficient, overfit=overfit, cutoff=cutoff, hidden_layers=hidden_layers, ) trn.create_Gs(elements, num_radial_etas, num_angular_etas, num_zetas, angular_type) trjbd = TrajectoryBuilder() calc = OpenKIMcalculator("SW_StillingerWeber_1985_Si__MO_405512056662_005") train_atoms = trjbd.build_atoms(system, size, temp, calc) steps, train_traj = trjbd.integrate_atoms(train_atoms, train_traj, n_train, save_interval, timestep=timestep)
for k, v in pretrained_dict.items() if k in model_dict.keys() and v.size() == model_dict[k].size() } print('matched keys:', len(pretrained_dict)) model_dict.update(pretrained_dict) model.load_state_dict(model_dict) # Move the model to the GPU. # criterion = model.loss() if (options["general"]["usecudnn"]): torch.cuda.manual_seed(options["general"]['random_seed']) torch.cuda.manual_seed_all(options["general"]['random_seed']) if (options["training"]["train"]): trainer = Trainer(options, model) if (options["validation"]["validate"]): if options['general']['mod'] == 'slice': validator = Validator2( options, 'validation', model, savenpy=options["validation"]["saves"], ) else: validator = Validator( options, 'validation', model, savenpy=options["validation"]["saves"], ) # TODO:change mod
optimizer = Adam(model.parameters(), lr=1e-3) # Criterion (add weights?) criterion = nn.CrossEntropyLoss() # Scheduler scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1.5 * 1e-3, steps_per_epoch=len(train_dataloader), epochs=num_epochs) # Training # optim_gen = lambda parameters, lr: SGD(parameters, lr=lr) # find_lr(model, optim_gen, min_lr, max_lr, num_epochs, train_dataloader, val_dataloader, criterion, device, batch_size, # batches_per_epoch, comet_experiment) save_path = pathlib.Path('models') / name save_path.mkdir(parents=True, exist_ok=True) trainer = Trainer(model, train_dataloader, val_dataloader, criterion, optimizer, None, device, TRAFFIC_LABELS, num_epochs, batch_size, batches_per_epoch, comet_experiment, save_path) try: trainer.fit() except KeyboardInterrupt: pass # Prediction data_path_root_test = pathlib.Path('test/') test_anno = pd.DataFrame({'id': [f'pic{num:06}' for num in range(10699)]}) test_dataset = MyDataset(data_dir=data_path_root_test, data_anno=test_anno, phase='test') test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0) preds = trainer.predict(test_dataloader) submit = pd.DataFrame({'id': [f'pic{num:06}' for num in range(10699)], 'category': [TRAFFIC_LABELS[pred] for pred in preds]}) submit.to_csv(save_path / 'submit.csv')
elements = ["Cu"] size = (2, 2, 2) temp = 500 n_test = int(2e4) save_interval = 100 max_steps = int(2e3) convergence = {"energy_rmse": 1e-16, "force_rmse": None, "max_steps": max_steps} force_coefficient = None cutoff = Polynomial(6.0, gamma=5.0) num_radial_etas = 6 num_angular_etas = 10 num_zetas = 1 angular_type = "G4" trn = Trainer( convergence=convergence, force_coefficient=force_coefficient, cutoff=cutoff ) trn.create_Gs(elements, num_radial_etas, num_angular_etas, num_zetas, angular_type) trjbd = TrajectoryBuilder() n_images = [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000] train_trajs = ["training_n{}.traj".format(ni) for ni in n_images] test_traj = "test.traj" for i in range(len(n_images)): calc = EMT() train_atoms = trjbd.build_atoms(system, size, temp, calc) n_train = n_images[i] * save_interval steps, train_trajs[i] = trjbd.integrate_atoms( train_atoms, train_trajs[i], n_train, save_interval )
class Learner(nn.Module): def __init__(self, process_id, gpu='cpu', world_size=4, optimizer=optim.Adam, optimizer_sparse=optim.SparseAdam, optim_params=(1e-3, (0.9, 0.995), 1e-8), model_params=None, tb=None): super(Learner, self).__init__() print(gpu) self.model = Policy_Network(data_parallel=False) saved_checkpoint = torch.load("./checkpoint.pth") self.model.load_state_dict(saved_checkpoint['model'], strict=False) if process_id == 0: optim_params = (self.model.parameters(),) + optim_params self.optimizer = optimizer(*optim_params) self.meta_optimizer = optim.SGD(self.model.parameters(), 0.03) self.process_id = process_id self.device='cuda:'+str(process_id) if gpu is not 'cpu' else gpu self.model.to(self.device) self.num_iter = 0 self.world_size = world_size self.original_state_dict = {} self.eps = np.finfo(np.float32).eps.item() self.use_ml = False self.use_rl = False self.trainer = Trainer(self.use_ml, self.use_rl, self.device) self.tb = tb # if process == 0: # optim_params = optim_params.insert(0, self.model_parameters()) # self.optimizer = optimizer(*optim_params) def save_checkpoint(self, model, optimizer, iteration): torch.save({'model': model.state_dict(), 'optimizer': optimizer.state_dict(),}, "checkpoint-{}.pth".format(iteration)) def _hook_grads(self, all_grads): hooks = [] for i, v in enumerate(self.model.parameters()): def closure(): ii = i return lambda grad: all_grads[ii] hooks.append(v.register_hook(closure())) return hooks def _write_grads(self, original_state_dict, all_grads, temp_data): # reload original model before taking meta-gradients self.model.load_state_dict(self.original_state_dict) self.model.to(self.device) self.model.train() self.optimizer.zero_grad() dummy_query_x, dummy_query_y = temp_data print(" ") action_probs = self.model(src_seq=dummy_query_x, trg_seq=dummy_query_y) m = Categorical(F.softmax(action_probs, dim=-1)) actions = m.sample().reshape(-1, 1) trg_t = dummy_query_y[:, :1] dummy_loss = -F.cross_entropy(action_probs, trg_t.reshape(-1), ignore_index=0, reduction='none').sum() print(" ") hooks = self._hook_grads(all_grads) dummy_loss.backward() print(" ") torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) self.optimizer.step() # gpu memory explodes if you dont remove hooks for h in hooks: h.remove() print("finished meta") def calc_reward(self, actions_pred, actions, ignore_index=0, sparse_rewards=False): # sparse rewards or char rewards if sparse_rewards: if actions_pred == EOS and actions == EOS: return torch.ones_like(actions).cuda().float() return torch.zeros_like(actions).cuda().float() else: # 1 if character is correct return (actions_pred==actions).float() def get_returns(self, rewards, batch_size, gamma): T = rewards.shape[1] discounts = torch.tensor(np.logspace(0, T, T, base=gamma, endpoint=False)).view(1, -1).to(self.device) all_returns = torch.zeros((batch_size, T)).to(self.device) for t in range(T): temp = (discounts[:, :T-t]*rewards[:, t:]).sum(dim=-1) all_returns[:, t] = temp (all_returns - all_returns.mean(dim=-1).view(-1, 1)) / (all_returns.std(dim=-1).view(-1, 1) + self.eps) return all_returns def policy_batch_loss(self, batch_qs, batch_as, gamma=0.9): batch_size, max_len_sequence = batch_qs.shape[0], batch_as.shape[1] current_as = batch_as[:, :1] complete = torch.ones((batch_size, 1)).to(self.device) rewards = torch.zeros((batch_size, 0)).to(self.device) values = torch.zeros((batch_size, 0)).to(self.device) log_probs = torch.zeros((batch_size, 0)).to(self.device) advantages_mask = torch.ones((batch_size, 0)).to(self.device) for t in range(1, max_len_sequence): advantages_mask = torch.cat((advantages_mask, complete), dim=1) # action_probs, curr_values = model(src_seq=batch_qs, trg_seq=current_as) action_probs = self.model(src_seq=batch_qs, trg_seq=current_as) m = Categorical(F.softmax(action_probs, dim=-1)) actions = m.sample().reshape(-1, 1) trg_t = batch_as[:, t].reshape(-1, 1) # update decoder output current_as = torch.cat((current_as, actions), dim=1) curr_log_probs = -F.cross_entropy(action_probs, trg_t.reshape(-1), ignore_index=0, reduction='none').reshape(-1, 1) # calculate reward based on character cross entropy curr_rewards = self.calc_reward(actions, trg_t) # update terms rewards = torch.cat((rewards, curr_rewards), dim=1).to(self.device) # values = torch.cat((values, curr_values), dim=1).to(self.device) log_probs = torch.cat((log_probs, curr_log_probs), dim=1) # if the action taken is EOS or if end of sequence trajectory ends complete *= (1 - ((actions==EOS) | (trg_t==EOS)).float()) returns = self.get_returns(rewards, batch_size, gamma) # advantages = returns - values advantages = returns advantages *= advantages_mask policy_losses = (-log_probs * advantages).sum(dim=-1).mean() batch_rewards = rewards.sum(dim=-1).mean() return policy_losses, batch_rewards def forward(self, num_updates, data_queue, data_event, process_event, tb=None, log_interval=100, checkpoint_interval=10000): while(True): data_event.wait() data = data_queue.get() dist.barrier(async_op=True) if self.process_id == 0: original_state_dict = {} data_event.clear() if self.process_id == 0 and self.num_iter != 0 and self.num_iter % checkpoint_interval == 0: self.save_checkpoint(model, optimizer, self.num_iter) # broadcast weights from master process to all others and save them to a detached dictionary for loadinglater for k, v in self.model.state_dict().items(): if self.process_id == 0: self.original_state_dict[k] = v.clone().detach() dist.broadcast(v, src=0, async_op=True) self.model.to(self.device) self.model.train() # meta gradients support_x, support_y, query_x, query_y = map(lambda x: torch.LongTensor(x).to(self.device), data) for i in range(num_updates): self.meta_optimizer.zero_grad() loss, _ = self.policy_batch_loss(support_x, support_y) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0) self.meta_optimizer.step() loss, rewards = self.policy_batch_loss(query_x, query_y) self.trainer.tb_policy_batch(self.tb, rewards, loss, self.num_iter, 0, 1) # loss, pred = self.model(query_x, query_y) all_grads = list(autograd.grad(loss, self.model.parameters())) for idx in range(len(all_grads)): dist.reduce(all_grads[idx].data, 0, op=dist.ReduceOp.SUM, async_op=True) all_grads[idx] = all_grads[idx] / self.world_size if self.process_id == 0: self.num_iter += 1 self._write_grads(original_state_dict, all_grads, (query_x, query_y)) # finished batch so can load data again from master process_event.set()
#atomic charges are present, so they replace the normal charge loss and nullify dipole loss if data.Qa is not None: qloss_train = qaloss_t qloss_valid = qaloss_v dloss_train = tf.constant(0.0) dloss_valid = tf.constant(0.0) #define loss function (used to train the model) l2loss = tf.reduce_mean(input_tensor=tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)) loss_t = eloss_train + args.force_weight * floss_train + args.charge_weight * qloss_train + args.dipole_weight * dloss_train + args.nhlambda * nhloss_t + args.l2lambda * l2loss loss_v = eloss_valid + args.force_weight * floss_valid + args.charge_weight * qloss_valid + args.dipole_weight * dloss_valid + args.nhlambda * nhloss_v + args.l2lambda * l2loss #create trainer trainer = Trainer(args.learning_rate, args.decay_steps, args.decay_rate, scope="trainer") with tf.compat.v1.name_scope("trainer_ops"): train_op = trainer.build_train_op(loss_t, args.ema_decay, args.max_norm) save_variable_backups_op = trainer.save_variable_backups() load_averaged_variables_op = trainer.load_averaged_variables() restore_variable_backups_op = trainer.restore_variable_backups() #creates a summary from key-value pairs given a dictionary def create_summary(dictionary): summary = tf.compat.v1.Summary() for key, value in dictionary.items(): summary.value.add(tag=key, simple_value=value) return summary
x_train = train[train.fold != current_val_fold].id.values x_val = train[train.fold == current_val_fold].id.values train_dataset = TGSSaltDataset(osp.join(directory, 'train'), x_train, is_test=False, is_val=False, augment_func=aug) val_dataset = TGSSaltDataset(osp.join(directory, 'train'), x_val, is_test=False, is_val=True) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) trainer = Trainer(myloss, iou_numpy, optimizer, MODEL_NAME, None, DEVICE) train_loader = get_loader(train_dataset, 'train', BATCH_SIZE) val_loader = get_loader(val_dataset, 'val', BATCH_SIZE) for i in range(EPOCHS): trainer.train(train_loader, model, i) trainer.validate(val_loader, model) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) trainer = Trainer(lovasz, iou_numpy, optimizer, MODEL_NAME, None, DEVICE) EPOCHS = 200 for i in range(EPOCHS): trainer.train(train_loader, model, i)
pretrained_embeddings = load_pretrained_embeddings(embeddings_path, train_dataset.word2idx, 300, is_crf=crf_model) name_ = 'LSTM' hp = HyperParameters(name_, train_dataset.word2idx, train_dataset.labels2idx, pretrained_embeddings, batch_size) # , collate_fn=DatasetParser.pad_collate train_dataset_ = DataLoader(dataset=train_dataset, batch_size=batch_size) dev_dataset_ = DataLoader(dataset=dev_dataset, batch_size=batch_size) test_dataset_ = DataLoader(dataset=test_dataset, batch_size=batch_size) model = BaselineModel(hp).to(train_dataset.get_device) trainer = Trainer( model=model, loss_function=CrossEntropyLoss(ignore_index=train_dataset.labels2idx['<PAD>']), optimizer=Adam(model.parameters()), batch_num=hp.batch_size, num_classes=hp.num_classes, verbose=True ) save_to_ = join(RESOURCES_PATH, f"{model.name}_model.pt") trainer.train(train_dataset_, dev_dataset_, epochs=1, save_to=save_to_) evaluator = Evaluator(model, test_dataset_, crf_model) evaluator.check_performance(train_dataset.idx2label)
import torch name = 'small_14_adam_1em3_bs128_' base_path = pathlib.Path('models') / name state_dict_path = base_path / 'best_model.pth' model = small_resnet14() model.load_state_dict(torch.load(state_dict_path)) batch_size = 256 trainer = Trainer(model, None, None, None, None, None, 'cuda', batch_size=batch_size, save_path=base_path) data_path_root_test = pathlib.Path('test/') test_anno = pd.DataFrame({ 'id': [f'pic{num:06}' for num in range(10699)], 'category': [0 for num in range(10699)] }) test_dataset = MyDataset(data_dir=data_path_root_test, data_anno=test_anno, phase='train') test_dataloader = DataLoader(test_dataset, batch_size=batch_size,
from model_new import Generator, Discriminator from training import Trainer import random import sys from torchsummary import summary img_size = (48, 48, 3) batch_size = 64 #Hyper Paramenters g_lr = 1e-4 d_lr = 4e-4 betas = (0., .99) data_loader, _, _ = get_STL10_dataloaders(batch_size=batch_size) generator = Generator(z_size = 128, channel = 3, output_size=48) discriminator = Discriminator(channel = 3, ssup = True) # Initialize optimizers G_optimizer = optim.Adam(generator.parameters(), lr=g_lr, betas=betas) D_optimizer = optim.Adam(discriminator.parameters(), lr=d_lr, betas=betas) # Train model epochs = 200 trainer = Trainer(generator, discriminator, G_optimizer, D_optimizer, weight_rotation_loss_d = 1.0, weight_rotation_loss_g = 0.2, critic_iterations=1, use_cuda=torch.cuda.is_available()) trainer.train(data_loader, epochs, save_training_gif=True)
from sklearn_crfsuite import scorers from sklearn_crfsuite import metrics import pickle with open("../lib/models/korrespondez_model_stage4.pickle", "rb") as f: m = pickle.load(f) labels = list(m.classes_) labels.remove('O') f1_scorer = make_scorer(metrics.flat_f1_score, average='weighted', labels=labels) t = Trainer("../lib/config/korr_nlp.json") #train,test = t.split(test_perc=0.20) #t.training = train #t.test = test print("extracting features...") t.set_feats_labels(template1) params_space = { 'c1': scipy.stats.expon(scale=0.5), 'c2': scipy.stats.expon(scale=0.05), } # search rs = RandomizedSearchCV(t.crf,
optimizer_D = torch.optim.Adam(model.discriminator.parameters(), lr=configs['d_learning_rate'], weight_decay=configs['weight_decay']) print('Model {}, Number of parameters {}'.format(args.model, count_params(model))) criterion = torch.nn.BCELoss() trainer = GANTrainer(model, optimizer_G, optimizer_D, train_loader, val_loader, test_loader, criterion, configs['epochs'], args.model) trainer.fit() sys.exit(0) elif args.model in ['inn']: model = INN(configs['ndim_total'], configs['input_dim'], configs['output_dim'], dim_z = configs['latent_dim']).to(DEVICE) print('Model {}, Number of parameters {}'.format(args.model, count_params(model))) optimizer = torch.optim.Adam(model.parameters(), lr=configs['learning_rate'], weight_decay=configs['weight_decay']) criterion = torch.nn.MSELoss() trainer = INNTrainer(model, optimizer, train_loader, val_loader, test_loader, criterion, configs['epochs'], args.model) trainer.fit() sys.exit(0) else: raise NameError print('Model {}, Number of parameters {}'.format(args.model, count_params(model))) criterion = nn.MSELoss() trainer = Trainer(model, optimizer, train_loader, val_loader, test_loader, criterion, configs['epochs'], args.model) # train the model trainer.fit()
def test_word_ranking(): t = Trainer() MLE_vec = t.calc_vector_MLE() MAP_matrix = t.calc_matrix_MAP() t.get_word_ranking(MAP_matrix, MLE_vec)