def predict(arr, checkpoint_path): padding = np.zeros((3, MAX_LEN)) padding[:, :len(arr[0])] = arr arr = padding model = Gest_CNN() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) _ = load_checkpoint(checkpoint_path, model, optimizer) tensor = torch.from_numpy(np.asarray([padding])).float() output = model(tensor) pred = output.argmax(dim=1, keepdim=True) return pred.data.numpy()[0][0] + 1
def main(): args = get_arguments() device = get_device() model = Gest_CNN() criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9) start_epoch = load_checkpoint(args.checkpoint, model, optimizer) if args.mode == "train": X_train, X_valid, y_train, y_valid = load_train_data() train(start_epoch, start_epoch+NUM_EPOCHS+1, model, criterion, optimizer, device, X_train, X_valid, y_train, y_valid) if args.mode == "test": X, y = load_test_data() test(X, y, model)
help='Checkpoint file to continue training from.') args = parser.parse_args() # Initialize torch modules model = AutoEncoder() model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=hp.training.learning_rate) scaler = torch.cuda.amp.GradScaler() criterion = torch.nn.MSELoss() # Load parameters from checkpoint to resume training or start new training if args.checkpoint is not None: run_dir = os.path.dirname(os.path.dirname(args.checkpoint)) model, optimizer, scaler, early_stopping_score, early_stopping_counter, running_mean, running_std, step, = \ load_checkpoint(run_dir, model, optimizer, scaler, 'latest') else: now = time.strftime("%Y-%m-%d__%H_%M_%S", time.localtime()) run_dir = os.path.join('..', 'runs', now) os.makedirs(os.path.join(run_dir, 'checkpoints'), exist_ok=True) os.makedirs(os.path.join(run_dir, 'logs'), exist_ok=True) early_stopping_score = None early_stopping_counter = 0 step = 0 # Initialize EarlyStopping early_stopping = EarlyStopping(step=step, run_dir=run_dir, best_score=early_stopping_score, counter=early_stopping_counter, verbose=False)
def train(model, data, dtype, args): iter_count = 0 disc_solver = model.disc_optimizer(args['learning_rate'], args['beta1'], args['beta2'], args['weight_decay'], args['optim']) gen_solver = model.gen_optimizer(args['learning_rate'], args['beta1'], args['beta2'], args['weight_decay'], args['optim']) starting_epoch = 0 if args['resume']: checkpoints.load_checkpoint(model, CHECKPOINTS_DIR, args['resume']) starting_epoch = args['resume'] // len(data) iter_count = args['resume'] print('Loading checkpoint of {name} at {chkpt_iter}'.format( name=model.name, chkpt_iter=args['resume'])) for epoch in tqdm(range(starting_epoch, args['num_epochs']), desc='epochs', position=1): for batch_index, batch in tqdm(enumerate(data), desc='iterations', position=2, total=len(data)): if args['resume'] and batch_index < iter_count % len(data): continue try: x, _ = batch except ValueError: x = batch x = x.type(dtype) # Batch data # Calculate number of discriminator iterations disc_iterations = args['disc_warmup_iterations'] if ( batch_index < args['disc_warmup_length'] or batch_index % args['disc_rapid_train_interval'] == 0) else args['disc_iterations'] # Train discriminator for _ in range(disc_iterations): disc_solver.zero_grad() real_data = model.preprocess_data(x).type(dtype) noise = model.sample_noise(args['batch_size']).type(dtype) disc_loss, fake_images = model.disc_loss( real_data, noise, True) disc_loss_gp = disc_loss + model.gradient_penalty( x, fake_images, lambda_val=args['lambda_val']) disc_loss_gp.backward() disc_solver.step() # Train generator gen_solver.zero_grad() noise = model.sample_noise(args['batch_size']).type(dtype) gen_loss = model.gen_loss(noise) gen_loss.backward() gen_solver.step() if iter_count % args['losses_every'] == 0: reporter.visualize_scalar( -disc_loss.item(), 'Wasserstein distance between x and g', iteration=iter_count, env=model.name) reporter.visualize_scalar(gen_loss.item(), 'Generator loss', iteration=iter_count, env=model.name) # send sample images to the visdom server. if iter_count % args['images_every'] == 0: reporter.visualize_images( model.sample_images(args['sample_size']).data, 'generated samples {}'.format(iter_count // args['images_every']), env=model.name) if iter_count % args['checkpoint_every'] == 0 and iter_count != 0: checkpoints.save_checkpoint(model, CHECKPOINTS_DIR, iter_count, args) iter_count += 1 args['resume'] = None samples = model.sample_images(args['sample_size']).data reporter.visualize_images(samples, 'final generated samples', env=model.name) checkpoints.save_images(samples, args['tag']) checkpoints.save_checkpoint(model, FINALS_DIR, iter_count, args)
gradients, _ = tf.clip_by_global_norm(gradients, gradient_clip) capped_gvs = zip(gradients, variables) #optimize apply_gradients = o.apply_gradients(capped_gvs) graph_utils._check_save_and_train_vars() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.tables_initializer()]) orig_step, saved_step, best_loss = 0, 0, 9999999999999 #maybe load checkpoint if checkpoints.load_checkpoint(sess, checkpoint_load_dir, filter_vars_not_in_graph=False): orig_step, best_loss = sess.run([graph_utils.GLOBAL_STEP, graph_utils.SAVED_STEP_INFO]) saved_step = orig_step #make saver for saving variables saver = tf.train.Saver(tf.get_collection(graph_utils.SAVE_VARS)) def debug_model(temp): sess.run(cell_reset_op) #start list with starter tokens allgen = char_dictionary.encode_string(debug_string_starter) #generate one token at a time for _ in range(debug_length): allgen.append(sess.run(generated, feed_dict={