def main(arg=None): images, labels = image_loader.read_batch() logits = inference.inference(images) loss = ls.loss(logits, labels) saver = tf.train.Saver() summary_opt = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(flag.log_dir, graph=sess.graph) train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) for i in xrange(5001): if i % 100 == 0: print 'step {0}, loss: {1}'.format(i, sess.run(ls.get_loss())) sess.run(train_step) if i % 50 == 0: summary_str = sess.run(summary_opt) summary_writer.add_summary(summary_str, i) saver.save(sess=sess, save_path=flag.save_dir) summary_writer.close()
def train(arguments): dls = UpscalerDataset(arguments.dataset_path, arguments.resized_dataset_path) feat_loss = get_loss(arguments.loss_model_path) learner = unet_learner(dls.get_dataloaders(bs, size), arch, loss_func=feat_loss, metrics=LossMetrics(feat_loss.metric_names), blur=True, norm_type=NormType.Weight) # stage 1 print('stage 1') do_fit(learner, wd, slice(lr * 10)) learner.unfreeze() do_fit(learner, wd, slice(1e-5, lr)) # checkpoint learner.save('checkpoint') learner.load('checkpoint') # stage 2 print('stage 2') del learner.dls learner.dls = dls.get_dataloaders(5, size * 2) learner.freeze() do_fit(learner, wd, slice(lr)) learner.unfreeze() do_fit(learner, wd, slice(1e-6, 1e-4), pct_start=0.3) # save torch.save({ 'model': learner.model, 'meta': make_meta(arguments) }, arguments.output)
def initialize_training(model_id, save_path): # The get_model method is in charge of # setting the same seed for each loaded model. # Thus, for each inner loop we train the same initialized model # Load model_0 to continue training with it if str(model_id).lower() == 'same': final_slash = save_path.rindex('/') model_0_path = os.path.join(save_path[:final_slash], "Model_0/model.pt") model = torch.load(model_0_path, map_location=parameters.device) else: model = get_model(model_id).to(parameters.device) print(model) writer = SummaryWriter(save_path) writer.add_scalar('batch_size', parameters.BATCH_SIZE) writer.add_scalar( 'weight_decay', parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg']) loss_func, include_boundaries = get_loss() # Honestly probably do not need to have hyper-parameters per model, but leave it for now. optimizer = torch.optim.Adam( model.parameters(), lr=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr'], weight_decay=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg']) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay_step'], gamma=parameters.HYPERPARAMETERS[parameters.MODEL_ID]['lr_decay']) return model, loss_func, include_boundaries, optimizer, scheduler, writer
def train(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab) trainer.run(iters['train'], max_epochs=args.max_epochs)
def main(): # Parse command-line arguments parser = argparse.ArgumentParser() parser.add_argument("command", help="Either 'plot' or 'write'.") parser.add_argument("folder", help="Folder containing log file.") args = parser.parse_args() # Verify command if args.command not in ['plot', 'write']: raise Exception("Unknown command {}".format(args.command)) # Get log filepath log_filepath = os.path.join(args.folder, "train.log") # Get loss values from file with open(log_filepath, "r") as log_file: step_loss = loss.get_loss(log_file) # Branch by command if args.command == "write": # Output the step loss as a CSV step_loss.to_csv(os.path.join(args.folder, "train.csv"), header=True) elif args.command == "plot": # Show plot of loss step_loss.plot() plt.ylim(0) plt.show()
def main(): ''' Main Function ''' #Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) torch.cuda.empty_cache() if args.evaluate: # Early evaluation for benchmarking validate(val_loader, net, criterion_val, optim, epoch, writer) evaluate(val_loader, net) return #Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) scheduler.step() train(train_loader, net, criterion, optim, epoch, writer) validate(val_loader, net, criterion_val, optim, epoch, writer)
def train(args): iters, vocab = get_iterator(args) model = get_model(args, vocab) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) trainer.run(iters['train'], max_epochs=args.max_epochs)
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) prep_experiment(args, parser) writer = None _, _, _, extra_val_loaders, _ = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) criterion_aux = loss.get_loss_aux(args) net = network.get_net(args, criterion, criterion_aux) optim, scheduler = optimizer.get_optimizer(args, net) net = torch.nn.SyncBatchNorm.convert_sync_batchnorm(net) net = network.warp_network_in_dataparallel(net, args.local_rank) epoch = 0 i = 0 if args.snapshot: epoch, mean_iu = optimizer.load_weights(net, optim, scheduler, args.snapshot, args.restore_optimizer) print("#### iteration", i) torch.cuda.empty_cache() # Main Loop # for epoch in range(args.start_epoch, args.max_epoch): for dataset, val_loader in extra_val_loaders.items(): print("Extra validating... This won't save pth file") validate(val_loader, dataset, net, criterion_val, optim, scheduler, epoch, writer, i, save_pth=False)
def forward(self, document_embeddings, question_embeddings, answer, document): U = self.encoder(document_embeddings, question_embeddings) start_logits, end_logits = self.decoder(U) loss, baseline, em = get_loss(start_logits, end_logits, answer, document, self.sigma_ce[0], self.sigma_rl[0]) return loss, baseline, em
def update(self, input, label): predicted = self.classifier(input) loss = get_loss(predicted, label, avg_meters=self.avg_meters) self.optimizer.zero_grad() loss.backward() self.optimizer.step() return {'predicted': predicted}
def train(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) optimizer = get_optimizer(args, model) pretrainer = get_pretrainer(args, model, loss_fn, optimizer) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) @pretrainer.on(Events.STARTED) def on_training_started(engine): print("Begin Pretraining") @pretrainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'pretrain/iter', engine.state, engine.state.iteration) @pretrainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch) """ @pretrainer.on(Events.COMPLETED) def unfreeze_language_model(engine): for param in model.module.language_model.base_model.parameters(): param.requires_grad = True """ @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Training") @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'train/iter', engine.state, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def evaluate_epoch(engine): log_results(logger, 'train/epoch', engine.state, engine.state.epoch) state = evaluate_once(evaluator, iterator=iters['val']) log_results(logger, 'valid/epoch', state, engine.state.epoch) log_results_cmd('valid/epoch', state, engine.state.epoch) save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model, vocab) evaluate_by_logic_level(args, model, iterator=iters['val']) if args.pretrain_epochs > 0: pretrainer.run(iters['pretrain'], max_epochs=args.pretrain_epochs) trainer.run(iters['train'], max_epochs=args.max_epochs)
def evaluate(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) metrics = get_metrics(args, vocab) evaluator = get_evaluator(args, model, loss_fn, metrics) state = evaluate_once(evaluator, iterator=iters['val']) log_results_cmd('valid/epoch', state, 0)
def main(): """ Main Function """ # Set up the Arguments, Tensorboard Writer, Dataloader, Loss Fn, Optimizer assert_and_infer_cfg(args) writer = prep_experiment(args, parser) train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) optim, scheduler = optimizer.get_optimizer(args, net) if args.fix_bn: net.apply(set_bn_eval) print("Fix bn for finetuning") if args.fp16: net, optim = amp.initialize(net, optim, opt_level="O1") net = network.wrap_network_in_dataparallel(net, args.apex) if args.snapshot: optimizer.load_weights(net, optim, args.snapshot, args.restore_optimizer) if args.evaluateF: assert args.snapshot is not None, "must load weights for evaluation" evaluate(val_loader, net, args) return torch.cuda.empty_cache() # Main Loop for epoch in range(args.start_epoch, args.max_epoch): # Update EPOCH CTR cfg.immutable(False) cfg.EPOCH = epoch cfg.immutable(True) scheduler.step() train(train_loader, net, optim, epoch, writer) if args.apex: train_loader.sampler.set_epoch(epoch + 1) validate(val_loader, net, criterion_val, optim, epoch, writer) if args.class_uniform_pct: if epoch >= args.max_cu_epoch: train_obj.build_epoch(cut=True) if args.apex: train_loader.sampler.set_num_samples() else: train_obj.build_epoch()
def train(): filename_queue = tf.train.string_input_producer([tfrecord_filename]) images, labels = reader.read_and_decode(filename_queue=filename_queue, batch_size=FLAGS.batch_size) with tf.device('/gpu:0'): global_step = slim.create_global_step() with tf.device('/cpu:0'): num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lrn_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) with tf.device('/gpu:0'): fc8 = alexnet.model(input_data=images, n_classes=FLAGS.n_classes, keep_prob=FLAGS.keep_prob) total_loss = loss.get_loss(input_data=fc8, grdtruth=labels) loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') losses = tf.get_collection('losses') loss_averages_op = loss_averages.apply(losses + [total_loss]) with tf.control_dependencies([loss_averages_op]): optimizer = tf.train.AdamOptimizer(lrn_rate) train_step = optimizer.minimize(loss=total_loss, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variable_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.control_dependencies([train_step, variable_averages_op]): train_step = tf.no_op(name='train') prediction = alexnet.classify(fc8) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for j in range(FLAGS.max_epoch): for i in range(200): _, loss_value, pre, grd = sess.run( [train_step, total_loss, prediction, labels]) print str(j + 1) + ' epoch' + ' ' + str( i) + ' minibatch' + ':' + str(loss_value) print str(j + 1) + ' epoch' + ' ' + str( i) + ' minibatch' + ':' + str(pre) + " " + str(grd) coord.request_stop() coord.join(threads)
def infer(args): args, model, iters, vocab, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args, vocab) evaluator = get_evaluator(args, model, loss_fn) answers = evaluate_once(evaluator, iterator=iters['test']) keys = sorted(list(answers.keys())) answers = [{"correct_idx": answers[key], "qid": key} for key in keys] path = str(args.data_path.parent / 'answers.json') with open(path, 'w') as f: json.dump(answers, f, indent=4) print("saved outcome at {}".format(path))
def main(eval_args=None): ''' Main Function ''' # Parse arguments from rest_communication.py #args = parser.parse_args(eval_args) if args.snapshot == None: args.snapshot = "checkpoints/best_cityscapes_checkpoint.pth" train_loader, val_loader, train_obj = datasets.setup_loaders(args) criterion, criterion_val = loss.get_loss(args) net = network.get_net(args, criterion) net = restore_snapshot(net) torch.cuda.empty_cache() return evaluate(val_loader, net)
def train(args): args, model, iters, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {}".format(args.ckpt_name)) loss_fn = get_loss(args) optimizer = get_optimizer(args, model) trainer = get_trainer(args, model, loss_fn, optimizer) metrics = get_metrics(args) evaluator = get_evaluator(args, model, loss_fn, metrics) logger = get_logger(args) trainer.run(iters['train']), max_epochs=args.max_epochs)
def main(): # Create dataset content_ds = ImageDataset(CONTENT_DS_PATH, batch_size=BATCH_SIZE) style_ds = ImageDataset(STYLE_DS_PATH, batch_size=BATCH_SIZE) # Build model vgg19 = build_vgg19(INPUT_SHAPE, VGG_PATH) # encoder decoder = build_decoder(vgg19.output.shape[1:]) # input shape == encoder output shape model = build_model(vgg19, decoder, INPUT_SHAPE) #model.load_weights(SAVE_PATH) # Get loss vgg19_relus = build_vgg19_relus(vgg19) loss = get_loss(vgg19, vgg19_relus, epsilon=EPSILON, style_weight=STYLE_WEIGHT, color_weight=COLOR_LOSS) # Train model train(model, content_ds, style_ds, loss, n_epochs=EPOCHS, save_path=SAVE_PATH)
def pretrain(args): tf = get_transform(args, 'none') ds = get_dataset(args, tf, 'none') args, model, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {} in pretraining stage".format(args.ckpt_name)) loss_fn = get_loss(args) sub_optimizer = get_sub_optimizer(args, model) optimizer = get_optimizer(args, sub_optimizer) scheduler = get_scheduler(args, optimizer) # setup nvidia/apex amp # model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level, num_losses=1) # model = idist.auto_model(model) trainer = get_trainer(args, model, loss_fn, optimizer, scheduler)
def create_model(config): model_checkpoint_path = config.project_config['model_checkpoint_path'] model_name = config.model_name model_path = f'{model_checkpoint_path}/{model_name}' model, loss_inp = DeepICPBuilder(config.net_config).build() loss = get_loss(config.train_config["loss_alpha"]) source_pts, target_pts, GT = loss_inp model.add_loss(loss(source_pts, target_pts, GT)) optimizer = Adam(learning_rate=config.train_config['learning_rate']) model.compile(optimizer=optimizer) save_model(model, config) config.save_current_model_net_config() return model
def train(): filename_queue = tf.train.string_input_producer([tfrecord_filename]) images, labels = reader.read_and_decode(filename_queue=filename_queue, batch_size=FLAGS.batch_size) with tf.device('/gpu:0'): global_step = slim.create_global_step() with tf.device('/cpu:0'): num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) lrn_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.6 with tf.device('/gpu:0'): optimizer = tf.train.AdamOptimizer(lrn_rate) fc8 = vgg_m.model(input_data=images, n_classes=FLAGS.n_classes, keep_prob=FLAGS.keep_prob) losses = loss.get_loss(input_data=fc8, grdtruth=labels) train_step = optimizer.minimize(loss=losses, global_step=global_step) prediction = vgg_m.classify(fc8) with tf.device('/cpu:0'): saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for j in range(FLAGS.max_epoch): for i in range(200): _, loss_value, pre, grd = sess.run( [train_step, losses, prediction, labels]) print str(j + 1) + ' epoch' + ' ' + str( i) + ' minibatch' + ':' + str(loss_value) print str(j + 1) + ' epoch' + ' ' + str( i) + ' minibatch' + ':' + str(pre) + " " + str(grd) save_path = saver.save(sess, model_path) print("Model saved in file:%s" % save_path) coord.request_stop() coord.join(threads)
def train(epoch): print('#' * 15) print('Epoch {}, Latent Size {}'.format(epoch, model.latent_size)) print('#' * 15) model.train() for index, (x, _) in enumerate(loader): x = x.mean(dim=1, keepdim=True).to(device) optimizer.zero_grad() x_generated, mu, logvar = model(x) loss = get_loss(x_generated, x, mu, logvar) loss.backward() optimizer.step() if index % 100 == 0: print('Loss at iteration {0}: {1:.4f}'.format(index, loss.item())) if epoch == 4: filename = 'epoch{}_ls{}.pkl'.format(epoch, model.latent_size) torch.save(model.state_dict(), os.path.join(weights_dir, filename)) if epoch < 4: scheduler.step()
def pretrain(args): tf = get_transform(args, 'none') ds = get_dataset(args, tf, 'none') args, model, ckpt_available = get_model_ckpt(args) if ckpt_available: print("loaded checkpoint {} in pretraining stage".format( args.ckpt_name)) loss_fn = get_loss(args) sub_optimizer = get_sub_optimizer(args, model) optimizer = get_optimizer(args, sub_optimizer) scheduler = get_scheduler(args, optimizer) # setup nvidia/apex amp # model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level, num_losses=1) # model = idist.auto_model(model) trainer = get_trainer(args, model, loss_fn, optimizer, scheduler) metrics = get_metrics(args) logger = get_logger(args) @trainer.on(Events.STARTED) def on_training_started(engine): print("Begin Pretraining") # batch-wise @trainer.on(Events.ITERATION_COMPLETED) def log_iter_results(engine): log_results(logger, 'pretrain/iter', engine.state, engine.state.iteration) # epoch-wise (ckpt) @trainer.on(Events.EPOCH_COMPLETED) def save_epoch(engine): log_results(logger, 'pretrain/epoch', engine.state, engine.state.epoch) log_results_cmd(logger, 'pretrain/epoch', engine.state, engine.state.epoch) save_ckpt(args, engine.state.epoch, engine.state.metrics['loss'], model) trainer.run(ds, max_epochs=args.epoch)
def run(args): set_seed(args.seed) xp = create_experiment(args) train_loader, val_loader, test_loader = get_loaders(args) loss = get_loss(xp, args) model = get_model(args) if args.load_model: load_model(model, args.load_model) if args.cuda: if args.parallel_gpu: model = torch.nn.DataParallel(model).cuda() else: torch.cuda.set_device(args.device) model.cuda() loss.cuda() optimizer = get_optimizer(model, args.mu, args.lr_0, xp) if args.load_optimizer: load_optimizer(optimizer, args.load_optimizer, args.lr_0) with logger.stdout_to("{}_log.txt".format(args.out_name)): clock = -time.time() for _ in range(args.epochs): xp.Epoch.update(1).log() optimizer = update_optimizer(args.lr_schedule, optimizer, model, loss, xp) xp.Learning_Rate.update().log() xp.Mu.update().log() xp.Temperature.update().log() train(model, loss, optimizer, train_loader, xp, args) test(model, loss, val_loader, xp, args) test(model, loss, test_loader, xp, args) clock += time.time() print("\nEvaluation time: \t {0:.2g} min".format(clock * 1. / 60))
def valid(loader, dt): return 0 # validation dt.eval() valid_loss = [] for j, vld_data in enumerate(tqdm(loader)): imgs, flows, inv_flows, masks, labels, n_clusters, _ = vld_data if imgs.dim() == 1: continue imgs, flows, inv_flows, masks, labels, n_clusters = \ imgs.cuda(), flows.cuda(), inv_flows.cuda(), masks.cuda(), labels.cuda(), n_clusters.cuda() with torch.no_grad(): fgmask, emb, tail = dt(imgs, flows, inv_flows) loss, fg_loss, var_loss, dist_loss = get_loss( fgmask, emb, tail, masks, labels, n_clusters) valid_loss.append(loss.detach().cpu()) valid_loss = np.mean(valid_loss) print(f"Validation Loss: {valid_loss}") dt.train() return valid_loss
def eval_linear(pretrain_args, args): # get pretrained model pt_args, pt_model, ckpt_available = get_model_ckpt(pretrain_args) tf = get_transform(args, 'train') ds = get_dataset(args, tf, 'train') if ckpt_available: print("loaded pretrained model {} in eval linear".format(args.ckpt_name)) model = get_linear(args, pt_model, args.num_classes) loss_fn = get_loss(args) optimizer = get_sub_optimizer(args, model) scheduler = get_scheduler(args, optimizer) trainer = get_trainer(args, model, loss_fn, optimizer, scheduler) evaluator = get_evaluator(args, model, loss_fn) # metrics = get_metrics(args) logger = get_logger(args) trainer.run(ds, max_epochs=args.epoch)
def test(): filename_queue = tf.train.string_input_producer([tfrecord_filename]) images, labels = reader.read_and_decode(filename_queue=filename_queue, batch_size=FLAGS.batch_size, shuffle_batch=False) # with tf.device('/gpu:0'): # global_step = slim.create_global_step() # with tf.device('/cpu:0'): # num_batches_per_epoch = FLAGS.data_size / FLAGS.batch_size # decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY) # lrn_rate = tf.train.exponential_decay( # FLAGS.learning_rate, global_step, decay_steps, LEARNING_RATE_DECAY_FACTOR, staircase=True) with tf.device('/gpu:0'): # optimizer = tf.train.AdamOptimizer(lrn_rate) fc8 = alexnet.model(input_data=images, n_classes=FLAGS.n_classes, keep_prob=FLAGS.keep_prob) losses = loss.get_loss(input_data=fc8, grdtruth=labels) # train_step = optimizer.minimize(loss=losses, global_step=global_step) prediction = alexnet.classify(fc8) with tf.device('/cpu:0'): saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) saver.restore(sess, model_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) total = 0.0 right = 0.0 for i in range(80): loss_value, pre, grd = sess.run([losses, prediction, labels]) print str(i + 1) + 'image loss :' + str(loss_value) print str(i + 1) + ' result:' + ':' + str(pre) + " " + str(grd) right += np.sum(np.equal(pre, grd)) total += 10 print 'accurcy:' + str(right / total) coord.request_stop() coord.join(threads)
def load_model(config): model_checkpoint_path = config.project_config['model_checkpoint_path'] model_name = config.model_name weights = config.weights optimizer = Adam(learning_rate=config.train_config['learning_rate']) optimizer_weights_path = f'{model_path}/optimizer_{checkpoint}.pkl' with open(optimizer_weights_path, 'rb') as f: weight_values = pickle.load(f) optimizer.set_weights(weight_values) model, loss_inp = DeepICPBuilder(config.net_config).build() loss = get_loss(config.train_config["loss_alpha"]) source_pts, target_pts, GT = loss_inp model.add_loss(loss(source_pts, target_pts, GT)) optimizer = Adam(learning_rate=config.train_config['learning_rate']) model.compile(optimizer=optimizer) model.load_weights(model_weights) return model
def main(args): set_cuda(args) set_seed(args) loader_train, loader_val, loader_test = get_data_loaders(args) loss = get_loss(args) model = get_model(args) optimizer = get_optimizer(args, model, loss, parameters=model.parameters()) xp = setup_xp(args, model, optimizer) for i in range(args.epochs): xp.epoch.update(i) train(model, loss, optimizer, loader_train, args, xp) test(model, optimizer, loader_val, args, xp) if (i + 1) in args.T: decay_optimizer(optimizer, args.decay_factor) load_best_model(model, '{}/best_model.pkl'.format(args.xp_name)) test(model, optimizer, loader_val, args, xp) test(model, optimizer, loader_test, args, xp)
def initialize_training(model_id, save_path, model_type=0, pre_train_path=None): # The get_model method is in charge of # setting the same seed for each loaded model. # Thus, for each inner loop we train the same initialized model # Load model_0 to continue training with it if str(model_id).lower() == 'same': final_slash = save_path.rindex('/') model_0_path = os.path.join(save_path[:final_slash], "Model_0/model.pt") model = torch.load(model_0_path, map_location=parameters.device) elif parameters.PRE_TRAIN and model_type == 0: # Load a pre-trained model print ("Loading Pre-Trained Model 0") model = torch.load(pre_train_path, map_location=parameters.device) elif parameters.HIERARCHICAL_PRE_TRAIN and model_type == 1: print ("Loading Pre-Trained Model 1") model = torch.load(pre_train_path, map_location=parameters.device) else: model = get_model(model_id).to(parameters.device) print(model) writer = SummaryWriter(save_path) writer.add_scalar('batch_size', parameters.BATCH_SIZE) writer.add_scalar('weight_decay', parameters.HYPERPARAMETERS[parameters.MODEL_ID]['l2_reg']) # Include whether we are using the second stage model second_stage = (model_type == 1) loss_func, include_boundaries = get_loss(is_second_stage=second_stage) # Honestly probably do not need to have hyper-parameters per model, but leave it for now. if str(model_id).lower() == 'same': model_id = parameters.MODEL_ID optimizer = torch.optim.Adam(model.parameters(), lr=parameters.HYPERPARAMETERS[model_id]['lr'], weight_decay=parameters.HYPERPARAMETERS[model_id]['l2_reg']) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, parameters.HYPERPARAMETERS[model_id]['lr_decay_step'], gamma=parameters.HYPERPARAMETERS[model_id]['lr_decay']) return model, loss_func, include_boundaries, optimizer, scheduler, writer