def main(args): model = None if args.model == 'GAN': model = GAN(args) elif args.model == 'DCGAN': model = DCGAN_MODEL(args) elif args.model == 'WGAN-CP': model = WGAN_CP(args) elif args.model == 'WGAN-GP': model = WGAN_GP(args) else: print("Model type non-existing. Try again.") exit(-1) # Load datasets to train and test loaders train_loader, test_loader = get_data_loader(args) # feature_extraction = FeatureExtractionTest(train_loader, test_loader, args.cuda, args.batch_size) # Start model training if args.is_train == 'True': model.train(train_loader) # start evaluating on test data else: model.evaluate(test_loader, args.load_D, args.load_G)
def make(self, cf, optimizer=None): if optimizer is None: raise ValueError('optimizer can not be None') if cf.model_name in [ 'lenet', 'alexNet', 'vgg16', 'vgg19', 'resnet50', 'InceptionV3', 'fcn8', 'unet', 'segnet_vgg', 'segnet_basic', 'resnetFCN', 'yolo', 'tiny-yolo' ]: in_shape, loss, metrics = self.basic_model_properties(cf, True) model = self.make_one_net_model(cf, in_shape, loss, metrics, optimizer) elif cf.model_name == 'adversarial_semseg': # loss, metrics and optimizer are made in class Adversarial_Semseg in_shape, _, _ = self.basic_model_properties(cf, False) model = Adversarial_Semseg(cf, in_shape) elif cf.model_name == 'gan': # loss, metrics and optimizer are made in class Adversarial_Semseg in_shape, _, _ = self.basic_model_properties(cf, False) model = GAN(cf, in_shape) else: raise ValueError('Unknown model name') # Output the model print(' Model: ' + cf.model_name) return model
def main(args): #--------------prepare data------------------------ dataset = args.dataset dataroot = args.dataroot if not os.path.exists(dataroot): os.makedirs(dataroot) batch_size = args.batch_size epochs = args.epochs channels = args.channels model = None model_name = args.model if args.model == 'GAN': model = GAN(epochs, batch_size) elif args.model == 'DCGAN': model = DCGAN_MODEL(args) elif args.model == 'WGAN-CP': model = WGAN_CP(args) elif args.model == 'WGAN-GP': model = WGAN_GP(args) else: print("Model type non-existing. Try again.") exit(-1) workers = 0 # number of workers for dataloader, 2 creates problems utils = Utils() train_loader, test_loader = utils.prepare_data(dataroot, batch_size, workers, dataset, model_name, channels) # Start model training resume_training = False if args.resume_training == 'True': resume_training = True if args.is_train == 'True': model.train(train_loader, resume_training) # start evaluating on test data else: model.evaluate(test_loader, args.load_D, args.load_G)
def main(_): if FLAGS.network == 'vae': model = VAE(mode=FLAGS.mode, batch_size=FLAGS.batch_size, latent_dim=FLAGS.latent_dim) solver = VAE_Solver(model, batch_size=FLAGS.batch_size, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'reconstruct': solver.reconstruct() elif FLAGS.mode == 'sample': solver.sample() elif FLAGS.mode == 'encode': solver.encode() elif FLAGS.network == 'gan': z_dim = 100 model = GAN(mode=FLAGS.mode) solver = GAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'sample': solver.sample() elif FLAGS.network == 'acgan': z_dim = 128 feature_class = 'Smiling' model = ACGAN(mode=FLAGS.mode, batch_size=FLAGS.batch_size) solver = ACGAN_Solver(model, batch_size=FLAGS.batch_size, z_dim=z_dim, feature_class=feature_class, train_iter=FLAGS.train_iter, log_dir=FLAGS.log_save_path, model_save_path=FLAGS.model_save_path, sample_save_path=FLAGS.sample_save_path) # create directories if not exist if not tf.gfile.Exists(FLAGS.model_save_path): tf.gfile.MakeDirs(FLAGS.model_save_path) if not tf.gfile.Exists(FLAGS.sample_save_path): tf.gfile.MakeDirs(FLAGS.sample_save_path) if FLAGS.mode == 'train': solver.train() elif FLAGS.mode == 'sample': solver.sample()
def main(argv=None): gen_dim = FLAGS.gen_dimension generator_dims = [64 * gen_dim, 64 * gen_dim // 2, 64 * gen_dim // 4, 64 * gen_dim // 8, 3] discriminator_dims = [3, 64, 64 * 2, 64 * 4, 64 * 8, 1] crop_image_size, resized_image_size = map(int, FLAGS.image_size.split(',')) # if FLAGS.model == 0: # model = GAN(FLAGS.z_dim, crop_image_size, resized_image_size, FLAGS.batch_size, FLAGS.data_dir, critic_iterations=1) # elif FLAGS.model == 1: # model = WasserstienGAN(FLAGS.z_dim, crop_image_size, resized_image_size, FLAGS.batch_size, FLAGS.data_dir, # clip_values=(-0.01, 0.01), critic_iterations=25) # else: # raise ValueError("Unknown model identifier - FLAGS.model=%d" % FLAGS.model) # # model.create_network(generator_dims, discriminator_dims, FLAGS.optimizer, FLAGS.learning_rate, # FLAGS.optimizer_param) # # model.initialize_network(FLAGS.logs_dir, FLAGS.checkpoint_file) # # if FLAGS.mode == "train": # model.train_model(int(1 + FLAGS.iterations)) # elif FLAGS.mode == "visualize": # model.visualize_model() import cross_dis gan1_scope_name = 'gan' gan2_scope_name = 'wgan' with tf.variable_scope(gan1_scope_name): gan1 = GAN(FLAGS.z_dim, crop_image_size, resized_image_size, FLAGS.batch_size, FLAGS.data_dir, critic_iterations=1, root_scope_name='gan/') gan1.create_network(generator_dims, discriminator_dims, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.optimizer_param) with tf.variable_scope(gan2_scope_name): gan2 = WasserstienGAN(FLAGS.z_dim, crop_image_size, resized_image_size, FLAGS.batch_size, FLAGS.data_dir, root_scope_name='wgan/', critic_iterations=5) gan2.create_network(generator_dims, discriminator_dims, FLAGS.optimizer, FLAGS.learning_rate, FLAGS.optimizer_param) cross_dis.run(gan1, gan2, gan1_scope_name, gan2_scope_name, discriminator_dims, FLAGS.logs_dir, FLAGS.checkpoint_file, int(FLAGS.iterations))
def main(model_name): if not os.path.isdir(".ckpts"): os.mkdir(".ckpts") if model_name not in ["gan", "dcgan"]: print("The model name is wrong!") return ckpt_path = ".ckpts/%s/" % model_name if not os.path.isdir(ckpt_path): os.mkdir(ckpt_path) with open("config.json") as f: config = json.load(f)[model_name] loader = MNISTLoader() if model_name == "gan": model = GAN(loader.feature_depth, config["latent_depth"]) elif model_name == "dcgan": model = DCGAN(loader.feature_shape, config["latent_depth"]) steps_per_epoch = (loader.num_train_sets + loader.num_test_sets) // config["batch_size"] features = np.vstack([loader.train_features, loader.test_features]) features = feature_normalize(features) generator_losses_epoch = [] discriminator_losses_epoch = [] generated_images = [] for i in range(1, config["num_epochs"] + 1): generator_loss_epoch = [] discriminator_loss_epoch = [] for _ in range(steps_per_epoch): sampled_indices = \ np.random.choice( loader.num_train_sets + loader.num_test_sets, config["batch_size"], replace=False ) real_samples = features[sampled_indices] generator_loss, discriminator_loss = model.train_one_step( real_samples) generator_loss_epoch.append(generator_loss) discriminator_loss_epoch.append(discriminator_loss) generator_loss_epoch = np.mean(generator_loss_epoch) discriminator_loss_epoch = np.mean(discriminator_loss_epoch) print( "Epoch: %i, Generator Loss: %f, Discriminator Loss: %f" % \ (i, generator_loss_epoch, discriminator_loss_epoch) ) generator_losses_epoch.append(generator_loss_epoch) discriminator_losses_epoch.append(discriminator_loss_epoch) torch.save(model.generator.state_dict(), ckpt_path + "generator_%i.ckpt" % i) torch.save(model.discriminator.state_dict(), ckpt_path + "discriminator_%i.ckpt" % i) faked_samples = feature_denormalize( model.generate(config["batch_size"])) generated_images.append(faked_samples.detach().numpy()) with open(ckpt_path + "results.pkl", "wb") as f: pickle.dump((generator_losses_epoch, discriminator_losses_epoch, generated_images), f)
ts = time.time() timestamp = datetime.datetime.fromtimestamp(ts).strftime('%d_%m_%Y_%H_%M_%S') parser = argparse.ArgumentParser(description='GAN without MI') parser.add_argument('--config', type=str, default='./configs/spiral_mine.yml', help = 'Path to config file') opts = parser.parse_args() params = get_config(opts.config) print(params) train_loader, val_loader = spiral_dataloader(params) if params['use_mine']: model = GAN_MI(params) else: model = GAN(params) if params['use_cuda']: model = model.cuda() logger = Logger(params['logs']) exp_logs = params['logs'] + params['exp_name'] + '_' + timestamp + '/' exp_results = params['results'] + params['exp_name'] + '_' + timestamp + '/' mkdir_p(exp_logs) mkdir_p(exp_results) if params['use_mine']: gan_trainer = GANTrainerMI(model, params, train_loader, val_loader, logger, exp_results, exp_logs) else: gan_trainer = GANTrainerVanilla(model, params, train_loader, val_loader, logger, exp_results, exp_logs)
def train(cv_num_batch, tr_num_batch): with tf.Graph().as_default(): with tf.device('/cpu:0'): with tf.name_scope('input'): tr_data_list = read_list(FLAGS.tr_list_file) tr_inputs, tr_labels = get_batch( tr_data_list, FLAGS.batch_size, FLAGS.input_dim, FLAGS.output_dim, FLAGS.left_context, FLAGS.right_context, FLAGS.num_threads, FLAGS.max_epoches) cv_data_list = read_list(FLAGS.cv_list_file) cv_inputs, cv_labels = get_batch( cv_data_list, FLAGS.batch_size, FLAGS.input_dim, FLAGS.output_dim, FLAGS.left_context, FLAGS.right_context, FLAGS.num_threads, FLAGS.max_epoches) devices = [] for i in xrange(FLAGS.num_gpu): device_name = ("/gpu:%d" % i) print('Using device: ', device_name) devices.append(device_name) # Prevent exhausting all the gpu memories. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # execute the session with tf.Session(config=config) as sess: # Create two models with tr_inputs and cv_inputs individually. with tf.name_scope('model'): print( "=======================================================") print( "| Build Train model |") print( "=======================================================") tr_model = GAN(sess, FLAGS, devices, tr_inputs, tr_labels, cross_validation=False) # tr_model and val_model should share variables print( "=======================================================") print( "| Build Cross-Validation model |") print( "=======================================================") tf.get_variable_scope().reuse_variables() cv_model = GAN(sess, FLAGS, devices, cv_inputs, cv_labels, cross_validation=True) show_all_variables() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) print("Initializing variables ...") sess.run(init) if tr_model.load(tr_model.save_dir): print("[*] Load SUCCESS") else: print("[!] Begin a new model.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: cv_d_rl_loss, cv_d_fk_loss, \ cv_d_loss, cv_g_adv_loss, \ cv_g_mse_loss, cv_g_l2_loss, \ cv_g_loss = eval_one_epoch(sess, coord, cv_model, cv_num_batch, 0) print("CROSSVAL.LOSS PRERUN: " "d_rl_loss = {:.5f}, d_fk_loss = {:.5f}, " "d_loss = {:.5f}, g_adv_loss = {:.5f}, " "g_mse_loss = {:.5f}, g_l2_loss = {:.5f}, " "g_loss = {:.5f}".format(cv_d_rl_loss, cv_d_fk_loss, cv_d_loss, cv_g_adv_loss, cv_g_mse_loss, cv_g_l2_loss, cv_g_loss)) sys.stdout.flush() g_loss_prev = cv_g_loss decay_steps = 1 for epoch in range(FLAGS.max_epoches): start = datetime.datetime.now() tr_d_rl_loss, tr_d_fk_loss, \ tr_d_loss, tr_g_adv_loss, \ tr_g_mse_loss, tr_g_l2_loss, \ tr_g_loss = train_one_epoch(sess, coord, tr_model, tr_num_batch, epoch+1) cv_d_rl_loss, cv_d_fk_loss, \ cv_d_loss, cv_g_adv_loss, \ cv_g_mse_loss, cv_g_l2_loss, \ cv_g_loss = eval_one_epoch(sess, coord, cv_model, cv_num_batch, epoch+1) d_lr, g_lr = sess.run( [tr_model.d_learning_rate, tr_model.g_learning_rate]) end = datetime.datetime.now() print("Epoch {} (TRAIN AVG.LOSS): " "d_rl_loss = {:.5f}, d_fk_loss = {:.5f}, " "d_loss = {:.5f}, g_adv_loss = {:.5f}, " "g_mse_loss = {:.5f}, g_l2_loss = {:.5f}, " "g_loss = {:.5f}, " "d_lr = {:.3e}, g_lr = {:.3e}\n" "Epoch {} (CROSS AVG.LOSS): " "d_rl_loss = {:.5f}, d_fk_loss = {:.5f}, " "d_loss = {:.5f}, g_adv_loss = {:.5f}, " "g_mse_loss = {:.5f}, g_l2_loss = {:.5f}, " "g_loss = {:.5f}, " "time = {:.2f} h".format( epoch + 1, tr_d_rl_loss, tr_d_fk_loss, tr_d_loss, tr_g_adv_loss, tr_g_mse_loss, tr_g_l2_loss, tr_g_loss, d_lr, g_lr, epoch + 1, cv_d_rl_loss, cv_d_fk_loss, cv_d_loss, cv_g_adv_loss, cv_g_mse_loss, cv_g_l2_loss, cv_g_loss, (end - start).seconds / 3600.0)) sys.stdout.flush() g_loss_new = cv_g_loss # Accept or reject new parameters if g_loss_new < g_loss_prev: tr_model.save(tr_model.save_dir, epoch + 1) print("Epoch {}: Nnet Accepted. " "Save model SUCCESS.".format(epoch + 1)) # Relative loss between previous and current val_loss g_rel_impr = (g_loss_prev - g_loss_new) / g_loss_prev g_loss_prev = g_loss_new else: print("Epoch {}: Nnet Rejected.".format(epoch + 1)) if tr_model.load(tr_model.save_dir): print("[*] Load previous model SUCCESS.") sys.stdout.flush() else: print("[!] Load failed. No checkpoint from {} to " "restore previous model. Exit now.".format( tr_model.save_dir)) sys.stdout.flush() sys.exit(1) # Relative loss between previous and current val_loss g_rel_impr = (g_loss_prev - g_loss_new) / g_loss_prev # Start decay when improvement is low (Exponential decay) if g_rel_impr < FLAGS.start_decay_impr and \ epoch+1 >= FLAGS.keep_lr: g_learning_rate = \ FLAGS.g_learning_rate * \ FLAGS.decay_factor ** (decay_steps) d_learning_rate = \ FLAGS.d_learning_rate * \ FLAGS.decay_factor ** (decay_steps) disc_noise_std = \ FLAGS.init_disc_noise_std * \ FLAGS.decay_factor ** (decay_steps) sess.run( tf.assign(tr_model.g_learning_rate, g_learning_rate)) sess.run( tf.assign(tr_model.d_learning_rate, d_learning_rate)) sess.run( tf.assign(tr_model.disc_noise_std, disc_noise_std)) decay_steps += 1 # Stopping criterion if g_rel_impr < FLAGS.end_decay_impr: if epoch < FLAGS.min_epoches: print("Epoch %d: We were supposed to finish, " "but we continue as min_epoches %d" % (epoch + 1, FLAGS.min_epoches)) continue else: print("Epoch %d: Finished, too small relative " "G improvement %g" % (epoch + 1, g_rel_impr)) break except Exception, e: # Report exceptions to the coordinator. coord.request_stop(e) finally:
def decode(): """Decoding the inputs using current model.""" tf.logging.info("Get TEST sets number.") num_batch = get_num_batch(FLAGS.test_list_file, infer=True) with tf.Graph().as_default(): with tf.device('/cpu:0'): with tf.name_scope('input'): data_list = read_list(FLAGS.test_list_file) test_utt_id, test_inputs, _ = get_batch( data_list, batch_size=1, input_size=FLAGS.input_dim, output_size=FLAGS.output_dim, left=FLAGS.left_context, right=FLAGS.right_context, num_enqueuing_threads=FLAGS.num_threads, num_epochs=1, infer=True) devices = [] for i in xrange(FLAGS.num_gpu): device_name = ("/gpu:%d" % i) print('Using device: ', device_name) devices.append(device_name) # Prevent exhausting all the gpu memories. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # execute the session with tf.Session(config=config) as sess: # Create two models with tr_inputs and cv_inputs individually. with tf.name_scope('model'): model = GAN(sess, FLAGS, devices, test_inputs, labels=None, cross_validation=True) show_all_variables() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) print("Initializing variables ...") sess.run(init) if model.load(model.save_dir, moving_average=True): print("[*] Load SUCCESS") else: print("[!] Load failed. Checkpoint not found. Exit now.") sys.exit(1) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) cmvn_filename = os.path.join(FLAGS.data_dir, "train_cmvn.npz") if os.path.isfile(cmvn_filename): cmvn = np.load(cmvn_filename) else: tf.logging.fatal("%s not exist, exit now." % cmvn_filename) sys.exit(1) out_dir_name = os.path.join(FLAGS.save_dir, 'test') if not os.path.exists(out_dir_name): os.makedirs(out_dir_name) write_scp_path = os.path.join(out_dir_name, 'feats.scp') write_ark_path = os.path.join(out_dir_name, 'feats.ark') writer = ArkWriter(write_scp_path) try: for batch in range(num_batch): if coord.should_stop(): break outputs = model.generator(test_inputs, None, reuse=True) outputs = tf.reshape(outputs, [-1, model.output_dim]) utt_id, activations = sess.run([test_utt_id, outputs]) sequence = activations * cmvn['stddev_labels'] + \ cmvn['mean_labels'] save_result = np.vstack(sequence) writer.write_next_utt(write_ark_path, utt_id[0], save_result) tf.logging.info("Write inferred %s to %s" % (utt_id[0], write_ark_path)) except Exception, e: # Report exceptions to the coordinator. coord.request_stop(e) finally:
# crop_size = crop_size, # ratio = 0.5, # capacity = batch_size*10, # min_holding= batch_size*5, # threads = 8) dataset = MNISTDataSet('/home/nathan/envs/tensorflow/MNIST_data', batch_size = batch_size) network = GAN( sess = sess, zed_dim = 2, n_kernels = 64, bayesian = False, dataset = dataset, input_channel = 1, log_dir = log_dir, save_dir = save_dir, input_dims = [28,28], load_snapshot = False, learning_rate = 2e-4, label_dim = 10) ## Has to come after init_op ??? coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) """ Training loop. Call network.train_step() once for each global step. Insert testing / snapshotting however you want.
def make_model(args, img_c): if args.model == 'autoencoder': if args.load: model = keras.models.load_model(os.path.join(args.out, 'ae')) print('Loaded model') else: # Autoencoder img = keras.Input((args.imsize, args.imsize, img_c), name='img-in') out = NormalizeImage()(img) out = encode(args, out, out_dim=args.zdim) out = MeasureNorm(name='latent_norm')(out) out = synthesize(args, out, img_c) out = AddMSE()((img, out)) model = keras.Model(img, out, name='autoencoder') model.compile(optimizer=keras.optimizers.Adam( args.ae_lr, args.beta1), steps_per_execution=args.steps_exec) print('Starting with new model') # Summarize model.summary() elif args.model == 'gan': # Generator and discriminator if args.load: # Loading model gen = keras.models.load_model(os.path.join(args.out, 'gen')) disc = keras.models.load_model(os.path.join(args.out, 'disc')) print('Loaded model') else: # Generator gen_in = keras.Input((args.imsize, args.imsize, img_c), name='gen-in') z = LatentMap(args)(gen_in) gen_out = synthesize(args, z, img_c) gen = keras.Model(gen_in, gen_out, name='generator') # Discriminator disc_in = keras.Input((args.imsize, args.imsize, img_c), name='disc-in') disc_out = NormalizeImage()(disc_in) disc_out = encode(args, disc_out, out_dim=1) disc = keras.Model(disc_in, disc_out, name='discriminator') gen.compile(keras.optimizers.Adam(args.gen_lr, args.beta1)) disc.compile(keras.optimizers.Adam(args.disc_lr, args.beta1)) print('Starting with new model') # Summarize disc.summary() gen.summary() # GAN model = GAN(args, gen, disc) model.compile(steps_per_execution=args.steps_exec) else: raise Exception(f'unknown model {args.model}') return model
def main(): parser = argparse.ArgumentParser(description="Generate 汉字 via generative adversarial network.") # Dataset parser.add_argument("--size", type=int, default=32, help="Font size.") parser.add_argument("--from_unicode", type=int, help="Starting point of the unicode.") parser.add_argument("--to_unicode", type=int, help="Ending point of the unicode.") parser.add_argument("--font", type=str, required=True, help="Path to the font file.") parser.add_argument("--num_workers", type=int, default=4, help="Number of data loading workers.") # Optimization parser.add_argument("--epochs", type=int, default=100, help="Number of epochs.") parser.add_argument("--batch_size", type=int, default=32, help="Batch size.") parser.add_argument("--gpu_ids", type=str, default='', help="GPUs for running this script.") parser.add_argument("--rand_dim", type=int, default=128, help="Dimension of the random vector.") parser.add_argument("--num_fakes", type=int, default=16, help="Use num_fakes generated images to train the discriminator.") parser.add_argument("--flip_rate", type=float, default=0.8, help="Label flipping rate.") parser.add_argument("--g_lr", type=float, default=0.01, help="Learning rate for generator.") parser.add_argument("--d_lr", type=float, default=0.01, help="Learning rate for discriminator.") parser.add_argument("--factor", type=float, default=0.2, help="Factor by which the learning rate will be reduced.") parser.add_argument("--patience", type=int, default=10, help="Number of epochs with no improvement after which learning rate will be reduced.") parser.add_argument("--threshold", type=float, default=0.1, help="Threshold for measuring the new optimum, to only focus on significant changes. ") # Misc parser.add_argument("--log_dir", type=str, default="../run/", help="Where to save the log?") parser.add_argument("--log_name", type=str, required=True, help="Name of the log folder.") parser.add_argument("--show_freq", type=int, default=64, help="How frequently to show generated images?") parser.add_argument("--seed", type=int, default=0, help="Random seed.") args = parser.parse_args() assert args.show_freq > 0 assert 0.0 <= args.flip_rate <= 1.0 # Check before run. if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) log_dir = os.path.join(args.log_dir, args.log_name) # Setting up logger log_file = datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log") sys.stdout = Logger(os.path.join(log_dir, log_file)) print(args) for s in args.gpu_ids: try: int(s) except ValueError as e: print("Invalid gpu id:{}".format(s)) raise ValueError os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.gpu_ids) if args.gpu_ids: if torch.cuda.is_available(): use_gpu = True torch.cuda.manual_seed_all(args.seed) else: use_gpu = False else: use_gpu = False torch.manual_seed(args.seed) dataloader, size = build_dataloader(args.batch_size, args.num_workers, use_gpu, args.font, args.size, args.from_unicode, args.to_unicode) model = GAN(args.num_fakes, args.rand_dim, size, use_gpu) criterion = BCELoss() d_optimizer = torch.optim.SGD(model.discriminator.parameters(), lr=args.d_lr, momentum=0.9) g_optimizer = torch.optim.SGD(model.generator.parameters(), lr=args.g_lr, momentum=0.9) d_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(d_optimizer, mode="min", factor=args.factor, patience=args.patience, verbose=True, threshold=args.threshold) g_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(g_optimizer, mode="min", factor=args.factor, patience=args.patience, verbose=True, threshold=args.threshold) optimizer = d_optimizer, g_optimizer scheduler = d_scheduler, g_scheduler if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model) print("Start training...") start = datetime.now() with SummaryWriter(log_dir) as writer: for epoch in range(args.epochs): for i, param_group in enumerate(d_optimizer.param_groups): d_learning_rate = float(param_group["lr"]) writer.add_scalar("d_lr_group_{0}".format(i), d_learning_rate, global_step=epoch) for i, param_group in enumerate(g_optimizer.param_groups): g_learning_rate = float(param_group["lr"]) writer.add_scalar("g_lr_group_{0}".format(i), g_learning_rate, global_step=epoch) train(model, dataloader, criterion, optimizer, use_gpu, writer, epoch, scheduler, args.num_fakes, args.flip_rate, args.show_freq) torch.save(model, os.path.join(log_dir, "latest.pth")) elapsed_time = str(datetime.now() - start) print("Finish training. Total elapsed time %s." % elapsed_time)
def train(valdi_batch_per_iter, train_batch_per_iter, min_iters, max_iters): with tf.Graph().as_default(): with tf.device('/cpu:0'): with tf.name_scope('input'): tr_data_list = read_list(FLAGS.tr_list_file) tr_inputs, tr_labels = get_batch( tr_data_list, FLAGS.batch_size, FLAGS.input_dim, FLAGS.output_dim, FLAGS.left_context, FLAGS.right_context, FLAGS.num_threads, FLAGS.max_epoches) cv_data_list = read_list(FLAGS.cv_list_file) cv_inputs, cv_labels = get_batch( cv_data_list, FLAGS.batch_size, FLAGS.input_dim, FLAGS.output_dim, FLAGS.left_context, FLAGS.right_context, FLAGS.num_threads, None) devices = [] for i in xrange(FLAGS.num_gpu): device_name = ("/gpu:%d" % i) print('Using device: ', device_name) devices.append(device_name) # Prevent exhausting all the gpu memories. config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True # execute the session with tf.Session(config=config) as sess: # Create two models with tr_inputs and cv_inputs individually. with tf.name_scope('model'): print( "=======================================================") print( "| Build Train model |") print( "=======================================================") tr_model = GAN(sess, FLAGS, devices, tr_inputs, tr_labels, cross_validation=False) # tr_model and val_model should share variables print( "=======================================================") print( "| Build Cross-Validation model |") print( "=======================================================") tf.get_variable_scope().reuse_variables() cv_model = GAN(sess, FLAGS, devices, cv_inputs, cv_labels, cross_validation=True) show_all_variables() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) print("Initializing variables ...") sess.run(init) if tr_model.load(tr_model.save_dir, moving_average=False): print("[*] Load SUCCESS") else: print("[!] Begin a new model.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: # Early stop counter g_loss_prev = 10000.0 g_rel_impr = 1.0 check_interval = 3 windows_g_loss = [] g_learning_rate = FLAGS.num_gpu * FLAGS.g_learning_rate d_learning_rate = FLAGS.num_gpu * FLAGS.d_learning_rate sess.run(tf.assign(tr_model.g_learning_rate, g_learning_rate)) sess.run(tf.assign(tr_model.d_learning_rate, d_learning_rate)) for iteration in range(max_iters): start = datetime.datetime.now() tr_d_rl_loss, tr_d_fk_loss, \ tr_d_loss, tr_g_adv_loss, \ tr_g_mse_loss, tr_g_l2_loss, \ tr_g_loss = train_one_iteration(sess, coord, tr_model, train_batch_per_iter, iteration+1) cv_d_rl_loss, cv_d_fk_loss, \ cv_d_loss, cv_g_adv_loss, \ cv_g_mse_loss, cv_g_l2_loss, \ cv_g_loss = eval_one_iteration(sess, coord, cv_model, valdi_batch_per_iter, iteration+1) d_learning_rate, \ g_learning_rate = sess.run([tr_model.d_learning_rate, tr_model.g_learning_rate]) end = datetime.datetime.now() print("{}/{} (INFO): d_learning_rate = {:.5e}, " "g_learning_rate = {:.5e}, time = {:.3f} min\n" "{}/{} (TRAIN AVG.LOSS): " "d_rl_loss = {:.5f}, d_fk_loss = {:.5f}, " "d_loss = {:.5f}, g_adv_loss = {:.5f}, " "g_mse_loss = {:.5f}, g_l2_loss = {:.5f}, " "g_loss = {:.5f}\n" "{}/{} (CROSS AVG.LOSS): " "d_rl_loss = {:.5f}, d_fk_loss = {:.5f}, " "d_loss = {:.5f}, g_adv_loss = {:.5f}, " "g_mse_loss = {:.5f}, g_l2_loss = {:.5f}, " "g_loss = {:.5f}".format( iteration + 1, max_iters, d_learning_rate, g_learning_rate, (end - start).seconds / 60.0, iteration + 1, max_iters, tr_d_rl_loss, tr_d_fk_loss, tr_d_loss, tr_g_adv_loss, tr_g_mse_loss, tr_g_l2_loss, tr_g_loss, iteration + 1, max_iters, cv_d_rl_loss, cv_d_fk_loss, cv_d_loss, cv_g_adv_loss, cv_g_mse_loss, cv_g_l2_loss, cv_g_loss)) sys.stdout.flush() # Start decay learning rate g_learning_rate = exponential_decay( iteration + 1, FLAGS.num_gpu, min_iters, FLAGS.g_learning_rate) d_learning_rate = exponential_decay( iteration + 1, FLAGS.num_gpu, min_iters, FLAGS.d_learning_rate) disc_noise_std = exponential_decay( iteration + 1, FLAGS.num_gpu, min_iters, FLAGS.init_disc_noise_std, multiply_jobs=False) sess.run( tf.assign(tr_model.g_learning_rate, g_learning_rate)) sess.run( tf.assign(tr_model.d_learning_rate, d_learning_rate)) sess.run(tf.assign(tr_model.disc_noise_std, disc_noise_std)) windows_g_loss.append(cv_g_loss) # Accept or reject new parameters. if (iteration + 1) % check_interval == 0: g_loss_new = np.mean(windows_g_loss) g_rel_impr = (g_loss_prev - g_loss_new) / g_loss_prev if g_rel_impr > 0.0: tr_model.save(tr_model.save_dir, iteration + 1) print("Iteration {}: Nnet Accepted. " "Save model SUCCESS. g_loss_prev = {:.5f}, " "g_loss_new = {:.5f}".format( iteration + 1, g_loss_prev, g_loss_new)) g_loss_prev = g_loss_new else: print("Iteration {}: Nnet Rejected. " "g_loss_prev = {:.5f}, " "g_loss_new = {:.5f}".format( iteration + 1, g_loss_prev, g_loss_new)) # tr_model.load(tr_model.save_dir, moving_average=False) windows_g_loss = [] # Stopping criterion. if iteration + 1 > min_iters and \ (iteration + 1) % check_interval == 0: if g_rel_impr < FLAGS.end_improve: print("Iteration %d: Finished, too small relative " "G improvement %g" % (iteration + 1, g_rel_impr)) break sys.stdout.flush() if windows_g_loss: g_loss_new = np.mean(windows_g_loss) g_rel_impr = (g_loss_prev - g_loss_new) / g_loss_prev if g_rel_impr > 0.0: tr_model.save(tr_model.save_dir, iteration + 1) print("Iteration {}: Nnet Accepted. " "Save model SUCCESS. g_loss_prev = {:.5f}, " "g_loss_new = {:.5f}".format( iteration + 1, g_loss_prev, g_loss_new)) g_loss_prev = g_loss_new sys.stdout.flush() windows_g_loss = [] except Exception, e: # Report exceptions to the coordinator. coord.request_stop(e) finally:
def parser(): PARSER = argparse.ArgumentParser() # Training parameters PARSER.add_argument('--epochs', default=40, type=int, help='Number of training epochs.') PARSER.add_argument('--batch_size', type=int, default=64, help='batch size') PARSER.add_argument('--latent_dim', type=int, default=100, help='dimensionality of the latent space') PARSER.add_argument('--lr', type=float, default=0.0002, help='learning rate') PARSER.add_argument("--b1", type=float, default=0.5, help="momentum; beta1 in Adam optimizer.") PARSER.add_argument("--b2", type=float, default=0.999, help="decay; beta2 in Adam optimizer.") PARSER.add_argument('--dropout_D', type=float, default=0.2, help='Dropout probability on the Discriminator.') PARSER.add_argument('--dropout_G', type=float, default=0.2, help='Dropout probability on the Generator.') PARSER.add_argument('--label_smoothing', type=bool, default=True, help='Label Smoothing.') PARSER.add_argument('--flipped_labbels', type=bool, default=True, help='Flipped Labbels.') PARSER.add_argument('--eval_mode', type=bool, default=True, help='Evaluation mode On/Off when sampling.') PARSER.add_argument('--n_samples', type=int, default=9, help='The number of the generated images.') PARSER.add_argument('--device', default=None, type=str, help='Device to run the experiment. \ Valid options: "cpu", "cuda".') PARSER.add_argument('--seed', default=None, type=int, help='Fix random seed.') PARSER.add_argument('--model', default='gan', type=str, help="Model to be used. Valid options: \ 'gan'.") ARGS = PARSER.parse_args() if ARGS.device is None: ARGS.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") if ARGS.model == 'gan': from models.gan import GAN ARGS.model = nn.DataParallel(GAN(args=ARGS).to(ARGS.device)) else: print('Model {} is not implimented'.format(ARGS.model)) quit() print_(ARGS) return ARGS