def train_fn(model): global_step = tf.train.create_global_step() trainable_vars = tf.trainable_variables() count_parameters(trainable_vars) optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate) rating_l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in trainable_vars if check_scope_rating(v.name) and 'bias' not in v.name ]) model.rating_loss = model.rating_loss + FLAGS.lambda_reg * rating_l2_loss review_l2_loss = tf.add_n([ tf.nn.l2_loss(v) for v in trainable_vars if check_scope_review(v.name) and 'bias' not in v.name ]) model.review_loss = model.review_loss + FLAGS.lambda_reg * review_l2_loss update_rating = optimizer.minimize(model.rating_loss, name='update_rating', global_step=global_step) update_review = optimizer.minimize(model.review_loss, name='update_review') return update_rating, update_review, global_step
def train_fn(loss): ''' Calculate gradients and update parameters based on loss ''' # get all trainaible variables trained_vars = tf.trainable_variables() # great utils function to calculate the parameters of the model and print them out count_parameters(trained_vars) # get gradients for parameter given loss gradients = tf.gradients(loss, trained_vars) # Gradient clipping (described in paper?): Clips values of multiple tensors by the ratio of the sum of their norms. clipped_grads, global_norm = tf.clip_by_global_norm( gradients, FLAGS.max_grad_norm) # save global norm tf.summary.scalar('global_grad_norm', global_norm) # Add gradients and vars to summary # for gradient, var in list(zip(clipped_grads, trained_vars)): # if 'attention' in var.name: # tf.summary.histogram(var.name + '/gradient', gradient) # tf.summary.histogram(var.name, var) # Define optimizer # Returns and create (if necessary) the global step tensor. global_step = tf.train.get_or_create_global_step() # define the optimizer rmsprop; paper uses different optimizer: SGD with momentum 0.9 optimizer = tf.train.RMSPropOptimizer(FLAGS.learning_rate) # get apply gradients operation train_op = optimizer.apply_gradients(zip(clipped_grads, trained_vars), name='train_op', global_step=global_step) return train_op, global_step
def train_fn(model): global_step = tf.train.create_global_step() trainable_vars = tf.trainable_variables() count_parameters(trainable_vars) optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate, epsilon=1e-4) rating_l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if check_scope_rating(v.name) and 'bias' not in v.name]) model.rating_loss = model.rating_loss + FLAGS.lambda_reg * rating_l2_loss review_l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if check_scope_review(v.name) and 'bias' not in v.name]) model.review_loss = model.review_loss + FLAGS.lambda_reg * review_l2_loss # grad_rating = optimizer.compute_gradients(model.rating_loss) # grad_review = optimizer.compute_gradients(model.review_loss) # def ClipIfNotNone(grad): # if grad is None: # return grad # return tf.clip_by_value(grad, -1., 1.) # clipped_grad_rating = [(ClipIfNotNone(grad), var) for grad, var in grad_rating] # clipeed_grad_review = [(ClipIfNotNone(grad), var) for grad, var in grad_review] # update_rating = optimizer.apply_gradients(clipped_grad_rating, name='update_rating', global_step=global_step) # update_review = optimizer.apply_gradients(clipeed_grad_review, name='update_review') update_rating = optimizer.minimize( model.rating_loss, name='update_rating', global_step=global_step) update_review = optimizer.minimize(model.review_loss, name='update_review') return update_rating, update_review, global_step
def get_generater(args, config): # Import the model. model = __import__(config["model"]) G = model.Generator(**config).to(config["device"]) utils.count_parameters(G) utils.load_state_dict(G, torch.load(args.weights_path), strict=not args.not_strict) G.eval() return G
def __init__(self, input_dims, hidden_size, embed_dim, output_dim, num_heads, attn_dropout, relu_dropout, res_dropout, out_dropout, layers, attn_mask=False, src_mask=False, tgt_mask=False): super(TransformerGenerationModel, self).__init__() [self.orig_d_a, self.orig_d_b] = input_dims assert self.orig_d_a == self.orig_d_b self.d_a, self.d_b = 512, 512 final_out = embed_dim * 2 h_out = hidden_size self.num_heads = num_heads self.layers = layers self.attn_dropout = attn_dropout self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.attn_mask = attn_mask self.embed_dim = embed_dim self.d_a, self.d_b = 512, 512 self.fc_a = nn.Linear(self.orig_d_a, self.d_a) self.fc_b = nn.Linear(self.orig_d_b, self.d_b) self.trans_encoder = self.get_encoder_network() self.trans_decoder = self.get_decoder_network() print("Encoder Model size: {0}".format( count_parameters(self.trans_encoder))) print("Decoder Model size: {0}".format( count_parameters(self.trans_decoder))) # Projection layers self.proj_enc = ComplexLinear(self.d_a, self.embed_dim) self.proj_dec = ComplexLinear(self.orig_d_a, self.embed_dim) self.out_fc1 = nn.Linear(final_out, h_out) self.out_fc2 = nn.Linear(h_out, output_dim) self.out_fc3 = nn.Linear(output_dim, 1000) self.out_dropout = nn.Dropout(out_dropout)
def convert(logdir_train3, logdir_convert): # WARNING! Do not load net1 or net2 # Load model net3 only net3_model = Net3() checkpoint_path = '{}/checkpoint.tar'.format(logdir_train3) checkpoint = torch.load(checkpoint_path) if checkpoint: net3_model.load_state_dict(checkpoint['model_state_dict']) # Create valid loader conversion_source_path = os.path.join(hp.quick_convert.data_path, 'test') conversion_source_set = Net3DataDir(conversion_source_path) conversion_source_loader = DataLoader( conversion_source_set, batch_size=hp.quick_convert.batch_size, shuffle=False, drop_last=False) # Run model: Warning! only give the net3_model, not othermodels here. spectrogram_batch = quick_convert(net3_model, conversion_source_loader, logdir_convert) # logging net3_num_params = count_parameters(net3_model) logger.debug('Network 3 number of params: {}'.format(net3_num_params))
def create_model(x, y, n_gpu, hparams): gen_logits = [] gen_loss = [] clf_loss = [] tot_loss = [] accuracy = [] trainable_params = None for i in range(n_gpu): with tf.device("/gpu:%d" % i): results = model(hparams, x[i], y[i], reuse=(i != 0)) gen_logits.append(results["gen_logits"]) gen_loss.append(results["gen_loss"]) clf_loss.append(results["clf_loss"]) if hparams.clf: tot_loss.append(results["gen_loss"] + results["clf_loss"]) else: tot_loss.append(results["gen_loss"]) accuracy.append(results["accuracy"]) if i == 0: trainable_params = tf.trainable_variables() print("trainable parameters:", count_parameters()) return trainable_params, gen_logits, gen_loss, clf_loss, tot_loss, accuracy
def main(args): logging.info('training on {} gpus'.format(torch.cuda.device_count())) logging.info('max tokens {} per gpu'.format(args.max_tokens)) logging.info('max sentences {} per gpu'.format(args.max_sentences)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) cudnn.enabled = True cudnn.benchmark = True cudnn.deterministic = True tasks.set_ljspeech_hparams(args) logging.info("args = %s", args) saved_args, model_state_dict, epoch, global_step, optimizer_state_dict, best_valid_loss = utils.load( args.output_dir) if any([ saved_args, model_state_dict, epoch, global_step, optimizer_state_dict ]): logging.info('Found exist checkpoint with epoch %d and updates %d', epoch, global_step) if saved_args is not None: saved_args.__dict__.update(args.__dict__) args = saved_args task = tasks.LJSpeechTask(args) task.setup_task(model_state_dict, optimizer_state_dict) logging.info("param size = %d", utils.count_parameters(task.model)) if args.max_epochs is not None: max_epochs = args.max_epochs else: max_epochs = float('inf') if args.max_updates is not None: max_updates = args.max_updates else: max_updates = float('inf') while epoch < max_epochs and global_step < max_updates: epoch += 1 decoder_loss, stop_loss, loss, global_step = task.train( epoch=epoch, num_updates=global_step) logging.info( 'train %d global step %d decoder loss %.6f stop loss %.6f total loss %.6f', epoch, global_step, decoder_loss, stop_loss, loss) decoder_loss, stop_loss, loss, fr, pcr, dfr = task.valid() logging.info( 'valid %d global step %d decoder loss %.6f stop loss %.6f total loss %.6f fr %.6f pcr %.6f dfr %.6f', epoch, global_step, decoder_loss, stop_loss, loss, fr, pcr, dfr) is_best = False if loss < best_valid_loss: best_valid_loss = loss is_best = True if epoch % args.save_interval == 0: utils.save(args.output_dir, args, task.model, epoch, global_step, task.optimizer, best_valid_loss, is_best)
def __init__(self, num_agents: int, num_trials: int, lr: float, initial_agent: ESAgent, agent_class, env_name: str, weights_std: float, seed: int, num_parallel: int, alpha: int, num_gradients: int): self.num_parallel = num_parallel self.num_agents = num_agents self.num_trials = num_trials self.env_name = env_name self.agent = initial_agent self.seed = seed self.lr = lr self.centroid = deepcopy(initial_agent.policy) self.weights_std = weights_std self.num_parameters = count_parameters(self.centroid) self.num_gradients = num_gradients self.pertrubations_distr = distrib.Normal( torch.zeros(self.num_parameters), 1) self.grad_distr = distrib.Normal(torch.zeros(self.num_gradients), 1) self.alpha = alpha self.grads = Buffer(self.num_parameters, num_gradients)
def single_worker(device, num_jobs, args, idx_beg=0): if idx_beg > 0 and num_jobs == 1: local_writers = [open(f"{args.output_dir}/decode.{args.nj}.ark", 'wb')] else: local_writers = [open(f"{args.output_dir}/decode.{i+1}.ark", 'wb') for i in range(num_jobs)] inferset = InferDataset(args.input_scp) inferset.dataset = inferset.dataset[idx_beg:] testloader = DataLoader( inferset, batch_size=1, shuffle=False, num_workers=args.workers, pin_memory=True) with open(args.config, 'r') as fi: configures = json.load(fi) model = build_model(args, configures, train=False) model = model.to(device) model.load_state_dict(torch.load( args.resume, map_location=device)) model.eval() print("> Model built.") print(" Model size:{:.2f}M".format( utils.count_parameters(model)/1e6)) cal_logit(model, testloader, device, local_writers)
def create_model(): """ helper function to instantiate new model """ ae = AutoEncoder() print('Loaded new AutoEncoder model') total_params = utils.count_parameters(ae) print(f'Model has {total_params} parameters') ae = ae.to(DEVICE) return ae
def __init__(self, net_config=None, opt_config=None, metric="", GPU=0, seed=None, **kwargs): # Set logger. self.logger = get_module_logger("QuantTransformer") self.logger.info("QuantTransformer PyTorch version...") # set hyper-parameters. self.net_config = net_config or DEFAULT_NET_CONFIG self.opt_config = opt_config or DEFAULT_OPT_CONFIG self.metric = metric self.device = torch.device("cuda:{:}".format( GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.seed = seed self.logger.info("Transformer parameters setting:" "\nnet_config : {:}" "\nopt_config : {:}" "\nmetric : {:}" "\ndevice : {:}" "\nseed : {:}".format( self.net_config, self.opt_config, self.metric, self.device, self.seed, )) if self.seed is not None: random.seed(self.seed) np.random.seed(self.seed) torch.manual_seed(self.seed) if self.use_gpu: torch.cuda.manual_seed(self.seed) torch.cuda.manual_seed_all(self.seed) self.model = get_transformer(self.net_config) self.model.set_super_run_type(super_core.SuperRunMode.FullModel) self.logger.info("model: {:}".format(self.model)) self.logger.info("model size: {:.3f} MB".format( count_parameters(self.model))) if self.opt_config["optimizer"] == "adam": self.train_optimizer = optim.Adam(self.model.parameters(), lr=self.opt_config["lr"]) elif self.opt_config["optimizer"] == "adam": self.train_optimizer = optim.SGD(self.model.parameters(), lr=self.opt_config["lr"]) else: raise NotImplementedError( "optimizer {:} is not supported!".format(optimizer)) self.fitted = False self.model.to(self.device)
def main(): global best_valacc, best_epoch, best_state for epoch in range(1, args.epochs + 1): if args.unfreeze == epoch: model.freeze(False) if args.changeopt: if args.adamopt: model.optimizer = torch.optim.Adam( model.parameters(), lr=0.005, weight_decay=model.args.weight_decay) else: model.optimizer = torch.optim.SGD( model.parameters(), lr=0.0005, weight_decay=model.args.weight_decay) elif args.freeze == epoch: model.freeze(True) if args.dropout and args.unfreeze < epoch: if args.dropout_all: model.targeted_dropout_all() else: model.targeted_dropout() if args.make_hard == epoch: model.make_hard() print('Epoch {} {} {} {}'.format(epoch, count_parameters(model), count_parameters(model, 1e-6), count_parameters(model, 0))) model.train_(train_loader, metric_saved, args.n_classes, save_target_on_leaves_=False, hard=False, mode='train') valacc = model.train_(valloader, metric_saved, args.n_classes, hard=True, mode='val') if valacc > best_valacc: best_state = model.state_dict() best_valacc = valacc best_epoch = epoch print('another best valacc:{:.2f}'.format(valacc)) save_metric()
def test(ENV): print('Loading environmnet...\n') env = UnityEnvironment(file_name=ENV) brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=False)[brain_name] # reset the environment states = env_info.vector_observations # get the current state (for each agent) print('Loading agent...\n') num_agents = len(env_info.agents) state_size, action_size = brain.vector_observation_space_size, brain.vector_action_space_size agent = Agent(num_agents=num_agents, state_size=state_size, action_size=action_size) print('Capacity of the Actor (# of parameters): ', count_parameters(agent.actor_local)) print('Capacity of the Critic (# of parameters): ', count_parameters(agent.critic_local)) scores = np.zeros(num_agents) # initialize the score dones = False # Tranfer Learning print('Tranfer Learning into Agent...\n') agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth')) agent.actor_local.eval() agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth')) agent.critic_local.eval() # Play print('Playing...\n') while not np.any(dones): actions = agent.act(states) # select an action (for each agent) actions = np.clip(actions, -1, 1) # all actions between -1 and 1 env_info = env.step(actions)[ brain_name] # send all actions to tne environment next_states = env_info.vector_observations # get next state (for each agent) rewards = env_info.rewards # get reward (for each agent) dones = env_info.local_done # see if episode finished scores += rewards # update the score (for each agent) states = next_states # roll over states to next time step print("Score: {}".format(scores))
def train_fn(loss): trained_vars = tf.trainable_variables() count_parameters(trained_vars) # Gradient clipping gradients = tf.gradients(loss, trained_vars) clipped_grads, global_norm = tf.clip_by_global_norm( gradients, FLAGS.max_grad_norm) tf.summary.scalar('global_grad_norm', global_norm) # Define optimizer global_step = tf.train.get_or_create_global_step() optimizer = tf.train.RMSPropOptimizer(FLAGS.learning_rate) train_op = optimizer.apply_gradients(zip(clipped_grads, trained_vars), name='train_op', global_step=global_step) return train_op, global_step
def test(args, recon_model): """ Performs evaluation of a pre-trained policy model. :param args: Argument object containing evaluation parameters. :param recon_model: reconstruction model. """ model, policy_args = load_policy_model( pathlib.Path(args.policy_model_checkpoint)) # Overwrite number of trajectories to test on policy_args.num_test_trajectories = args.num_test_trajectories if args.data_path is not None: # Overwrite data path if provided policy_args.data_path = args.data_path # Logging of policy model logging.info(args) logging.info(recon_model) logging.info(model) if args.wandb: wandb.config.update(args) wandb.watch(model, log='all') # Initialise summary writer writer = SummaryWriter(log_dir=policy_args.run_dir / 'summary') # Parameter counting logging.info( 'Reconstruction model parameters: total {}, of which {} trainable and {} untrainable' .format(count_parameters(recon_model), count_trainable_parameters(recon_model), count_untrainable_parameters(recon_model))) logging.info( 'Policy model parameters: total {}, of which {} trainable and {} untrainable' .format(count_parameters(model), count_trainable_parameters(model), count_untrainable_parameters(model))) # Create data loader test_loader = create_data_loader(policy_args, 'test', shuffle=False) test_data_range_dict = create_data_range_dict(policy_args, test_loader) do_and_log_evaluation(policy_args, -1, recon_model, model, test_loader, writer, 'Test', test_data_range_dict) writer.close()
def test_lcg(a, c, m, n, bit_index=0, batch_size=128, num_batches=128, num_epochs=2): print("Initializing a model for bit {}...".format(bit_index)) model = Model(n, 2) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) data_source = LcgBitDataset(bit_index, a=a, c=c, m=m, n=n, batch_size=batch_size, num_batches=num_batches) data_loader = data.DataLoader( data_source, **{ 'batch_size': batch_size, 'num_workers': 50 }) max_epochs = trange(num_epochs) for epoch in max_epochs: max_epochs.set_description( "Epoch {} - Generating data...".format(epoch + 1)) iteration_count = 0 for x, y in data_loader: optimizer.zero_grad() output = model(x) loss = criterion(output, y) loss.backward() optimizer.step() iteration_count += 1 max_epochs.set_description("Epoch {} - Loss {:.3f}; {:.0%}".format( epoch + 1, loss.item(), iteration_count / data_source.num_batches)) print("Benchmarking model against random guesser...") nn_guess_accuracy, random_guess_accuracy = utils.model_vs_random( model, data_loader) print("\n=========== Results ==============") print(" Net was correct: {:.2%}".format(nn_guess_accuracy)) print(" Random was correct: {:.2%}".format(random_guess_accuracy)) print("==================================") return nn_guess_accuracy, random_guess_accuracy, utils.count_parameters( model)
def __init__(self): super(Net, self).__init__() encoder = ConvNet() hooks, inp_szs, enc_szs = get_hooks(encoder.downsample) idxs = list(enc_szs.keys()) x_sz = enc_szs[len(enc_szs) - 1] head = FeedForward(x_sz) layers = [encoder, head] [print(count_parameters(x)) for x in layers] self.layers = nn.Sequential(*layers)
def main(): global args, config, last_epoch, best_prec, writer writer = SummaryWriter(log_dir=args.work_path + '/event') # 加载配置文件 with open(args.work_path + '/config.yaml') as f: config = yaml.load(f) config = easydict.EasyDict(config) logger.info((config)) # 获取模型 net = get_model(config) logger.info(net) logger.info("=====total parameters:" + str(utils.count_parameters(net))) device = 'cuda' if config.use_gpu else 'cpu' if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True net.to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), config.lr_scheduler.base_lr, momentum=config.optimize.momentum, weight_decay=config.optimize.weight_decay, nesterov=config.optimize.nesterov) last_epoch = -1 best_prec = 0 # 加载训练过的模型继续训练 if args.work_path: ckpt_file_name = args.work_path + '/' + config.ckpt_name + '.pth.tar' if args.resume: best_prec, last_epoch = utils.load_checkpoint(ckpt_file_name, net, optimizer=optimizer) # 设置数据的格式转换 transform_train = transforms.Compose(utils.data_augmentation(config)) transform_test = transforms.Compose( utils.data_augmentation(config, is_train=False)) train_loader, test_loader = utils.get_data_loader(transform_train, transform_test, config) logger.info("==============trian-test-file-pathL{}".format(config.dataset)) logger.info(" ======= Training =======\n") for epoch in range(last_epoch + 1, config.epochs): lr = utils.adjust_learning_rate(optimizer, epoch, config) writer.add_scalar('learning_rate', lr, epoch) train(train_loader, net, criterion, optimizer, epoch, device) if epoch == 0 or ( epoch + 1) % config.eval_freq == 0 or epoch == config.epochs - 1: test(test_loader, net, criterion, optimizer, epoch, device) writer.close() logger.info( "======== Training Finished. best_test_acc: {:.3f}% ========".format( best_prec))
def __init__(self, time_step, input_dims, hidden_size, embed_dim, output_dim, num_heads, attn_dropout, relu_dropout, res_dropout, out_dropout, layers, attn_mask=False): """ Construct a basic Transfomer model. :param input_dims: The input dimensions of the various modalities. :param hidden_size: The hidden dimensions of the fc layer. :param embed_dim: The dimensions of the embedding layer. :param output_dim: The dimensions of the output (128 in MuiscNet). :param num_heads: The number of heads to use in the multi-headed attention. :param attn_dropout: The dropout following self-attention sm((QK)^T/d)V. :param relu_droput: The dropout for ReLU in residual block. :param res_dropout: The dropout of each residual block. :param out_dropout: The dropout of output layer. :param layers: The number of transformer blocks. :param attn_mask: A boolean indicating whether to use attention mask (for transformer decoder). """ super(TransformerModel, self).__init__() [self.orig_d_a, self.orig_d_b] = input_dims assert self.orig_d_a == self.orig_d_b self.d_a, self.d_b = 512, 512 final_out = embed_dim * 2 h_out = hidden_size self.num_heads = num_heads self.layers = layers self.attn_dropout = attn_dropout self.relu_dropout = relu_dropout self.res_dropout = res_dropout self.attn_mask = attn_mask self.embed_dim = embed_dim # Transformer networks self.trans = self.get_network() print("Encoder Model size: {0}".format(count_parameters(self.trans))) self.fc_a = nn.Linear(self.orig_d_a, self.d_a) self.fc_b = nn.Linear(self.orig_d_b, self.d_b) # Projection layers self.proj = ComplexLinear(self.d_a, self.embed_dim) self.out_fc1 = nn.Linear(final_out, h_out) self.out_fc2 = nn.Linear(h_out, output_dim) self.out_dropout = nn.Dropout(out_dropout)
def __init__(self, model, batch_per_epoch, config): """ Initialize model pruner. """ self.weights_count = utils.count_parameters(model) self.itr = 1 self.pruners = [] for name, weight in model.named_parameters(): if 'bias' not in name: self.pruners.append( WeightPruner(name, weight, batch_per_epoch, config[name]))
def __init__(self): super(Net2, self).__init__() c = 10 downsample = ConvResBlock(1, c) x = torch.randn(1, 1, 28, 28) x.requires_grad_(False) x_sz = downsample(x).shape head = FeedForward(x_sz) layers = [downsample, head] [print(count_parameters(x)) for x in layers] self.layers = nn.Sequential(*layers)
def trainGANs(n=100, datadir='data/gan/'): for i in range(n): try: os.mkdir(datadir+str(i)) except FileExistsError: pass loader = data.MNIST(batch=128) model = SimpleGAN(28*28, zdim=64, hd=64, hg=64, lr=2e-4).cuda() print('Network: ' + str(i) + ', Params: ' + str(utils.count_parameters(model))) #model = DCGAN(zdim=16, h=4, lr=2e-4).cuda() trainer = MNISTTrainer(model, loader, datadir+str(i)+'/') trainer.train(epochs=100)
def training_curves(models, y_data, settings, histories, smoothing=1): """function for building training curves""" epochs = np.arange(settings['epochs']) markers = ['.', '^'] # ok for two outputs only if len(models) == 1: f = plt.figure(figsize=(4 * len(models), 4 * len(models))) else: f = plt.figure(figsize=(10 * len(models) / 3, 10 * len(models))) for i, mod in enumerate(models): model_name = utils.get_model_name(mod, settings['dataset']) plt.subplot(1, len(models), i + 1) for j in range(len(y_data)): plt.plot(np.convolve(np.log( histories[model_name].history[y_data[j] + '_mse']), np.ones(smoothing) / smoothing, mode='valid'), 'k--', alpha=0.5) plt.plot(np.convolve(np.log( histories[model_name].history['val_' + y_data[j] + '_mse']), np.ones(smoothing) / smoothing, mode='valid'), 'r--', alpha=0.5) plt.plot(epochs[::smoothing], np.convolve(np.log( histories[model_name].history[y_data[j] + '_mse']), np.ones(smoothing) / smoothing, mode='valid')[::smoothing], 'k' + markers[j], label=y_data[j], alpha=0.5) plt.plot(epochs[::smoothing], np.convolve(np.log( histories[model_name].history['val_' + y_data[j] + '_mse']), np.ones(smoothing) / smoothing, mode='valid')[::smoothing], 'r' + markers[j], alpha=0.5) if i == 0: plt.legend(frameon=False) plt.xlabel('epochs') plt.ylabel('log MSE') plt.title(model_name + ' ({} params)'.format(utils.count_parameters(mod))) plt.gca().set_aspect(1. / plt.gca().get_data_ratio()) plt.tight_layout() return f
def __init__(self, epochs=NUM_EPOCHS): self.model = SingleNetwork() self.alpha = list(pd.read_pickle('class_weights.pkl').values()) self.criterion = FocalLoss(gamma=2, alpha=self.alpha) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-1, weight_decay=1e-2) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=10, eta_min=5e-5) self.epochs = epochs self.train_loader = torch.load('train_loader.pt') self.cv_loader = torch.load('cv_loader.pt') self.best_loss = 1e3 print(count_parameters(self.model))
def __init__(self): super(ODENet, self).__init__() c = 64 downsample = ConvResBlock(1, c) x = torch.randn(1, 1, 28, 28) x.requires_grad_(False) x_sz = downsample(x).shape self.feature_layers = ODEBlock(ODEfunc(x_sz[1])) head = FeedForward(x_sz) layers = [downsample, self.feature_layers, head] [print(count_parameters(x)) for x in layers] self.layers = nn.Sequential(*layers) print(self.layers)
def main(): set_random_seed(C.seed) summary_writer = SummaryWriter(C.log_dpath) train_iter, val_iter, test_iter, vocab = build_loaders(C) model = build_model(C, vocab) print("#params: ", count_parameters(model)) model = model.cuda() optimizer = torch.optim.Adamax(model.parameters(), lr=C.lr, weight_decay=1e-5) lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, C.epochs, eta_min=0, last_epoch=-1) best_val_scores = {'CIDEr': -1.} for e in range(1, C.epochs + 1): print() ckpt_fpath = C.ckpt_fpath_tpl.format(e) """ Train """ teacher_forcing_ratio = get_teacher_forcing_ratio( C.decoder.max_teacher_forcing_ratio, C.decoder.min_teacher_forcing_ratio, e, C.epochs) train_loss = train(e, model, optimizer, train_iter, vocab, teacher_forcing_ratio, C.CA_lambda, C.gradient_clip) log_train(C, summary_writer, e, train_loss, get_lr(optimizer), teacher_forcing_ratio) lr_scheduler.step() """ Validation """ val_loss = evaluate(model, val_iter, vocab, C.CA_lambda) val_scores, _, _, _ = score(model, val_iter, vocab) log_val(C, summary_writer, e, val_loss, val_scores) if val_scores['CIDEr'] > best_val_scores['CIDEr']: best_val_scores = val_scores best_epoch = e best_model = model print("Saving checkpoint at epoch={} to {}".format(e, ckpt_fpath)) save_checkpoint(ckpt_fpath, e, model, optimizer) """ Test """ test_scores, _, _, _ = score(best_model, test_iter, vocab) for metric in C.metrics: summary_writer.add_scalar("BEST SCORE/{}".format(metric), test_scores[metric], best_epoch) best_ckpt_fpath = C.ckpt_fpath_tpl.format("best") save_checkpoint(best_ckpt_fpath, best_epoch, best_model, optimizer)
def build_model(self): self.G = LineartoMel_real(F=self.F, melF_to_linearFs=self.melF_to_linearFs, nCH=self.config.nCH, w=self.config.convW, H=self.config.nMap_per_F, L=self.config.L_CNN, non_linear=self.config.non_linear, BN=self.config.complex_BN) # 현재 사용중인 모델 G_name = 'LineartoMel_real' print('initialized enhancement model as ' + G_name) nParam = count_parameters(self.G) print('# trainable parameters = ' + str(nParam))
def load_model(model_path): """ helper function to load model if given path exists otherwise creates new object and returns it """ if not os.path.exists(model_path): raise RuntimeError(f'Could not find provided model_path: {model_path}') ae = AutoEncoder() ae.load_state_dict(torch.load(model_path, map_location=DEVICE)) print(f'Loaded existing model from {model_path}') total_params = utils.count_parameters(ae) print(f'Model has {total_params} parameters') ae = ae.to(DEVICE) return ae
def train_transformer(): if args.data == 'iq': input_size = int(3200 / (args.src_time_step + args.trg_time_step)) else: input_size = 4096 input_dim = int(input_size / 2) model = TransformerGenerationModel( ntokens=10000, # TODO: wait for Paul's data # time_step=args.time_step, input_dims=[input_dim, input_dim], # proj_dims=args.modal_lengths, hidden_size=args.hidden_size, # output_dim=args.output_dim, num_heads=args.num_heads, attn_dropout=args.attn_dropout, relu_dropout=args.relu_dropout, res_dropout=args.res_dropout, layers=args.nlevels, horizons=args.nhorizons, attn_mask=args.attn_mask, crossmodal=args.crossmodal) if use_cuda: model = model.cuda() print("Model size: {0}".format(count_parameters(model))) optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr, weight_decay=1e-7) # criterion = nn.CrossEntropyLoss() criterion = nn.MSELoss() scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5, verbose=True) settings = { 'model': model, 'optimizer': optimizer, 'criterion': criterion, 'scheduler': scheduler, 'input_size': input_size, 'src_time_step': args.src_time_step, 'trg_time_step': args.trg_time_step } return train_model(settings)