def main(): args = parser.parse_args() pp.pprint(vars(args)) config = vars(args) # train with different datasets if args.dataset == 'oracle': oracle_model = OracleLstm(num_vocabulary=args.vocab_size, batch_size=args.batch_size, emb_dim=args.gen_emb_dim, hidden_dim=args.hidden_dim, sequence_length=args.seq_len, start_token=args.start_token) oracle_loader = OracleDataLoader(args.batch_size, args.seq_len) gen_loader = OracleDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=args.vocab_size, batch_size=args.batch_size, seq_len=args.seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=args.seq_len, vocab_size=args.vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) oracle_train(generator, discriminator, oracle_model, oracle_loader, gen_loader, config) elif args.dataset in ['image_coco', 'emnlp_news']: data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset)) seq_len, vocab_size = text_precess(data_file) config['seq_len'] = seq_len # override the sequence length config['vocab_size'] = vocab_size print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) oracle_loader = RealDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) f_classifier = models.get_classifier(args.f_architecture, scope="f_classifier", batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.f_emb_dim, num_rep=args.num_rep, sn=args.sn) real_train(generator, discriminator, f_classifier, oracle_loader, config) else: raise NotImplementedError('{}: unknown dataset!'.format(args.dataset))
def __init__(self, config): self.config = config self.ds_train, self.info = get_dataset_and_info(self.config) self.steps_per_epoch = self.info['num_records'] // self.config[ 'batch_size'] self.generator = get_generator(self.info["num_classes"]) self.discriminator = get_discriminator(self.info["num_classes"]) if self.config['loss'] == "cross_entropy": print("use ce loss") self.gloss_fn = cross_entropy_g self.dloss_fn = cross_entropy_d elif self.config['loss'] == "hinge_loss": print("use hinge loss") self.gloss_fn = hinge_loss_g self.dloss_fn = hinge_loss_d else: raise ValueError('Unsupported loss type') lr_fn_G = tf.optimizers.schedules.ExponentialDecay( 1e-4, self.steps_per_epoch, decay_rate=0.99, staircase=True) lr_fn_D = tf.optimizers.schedules.ExponentialDecay( 4e-4, self.steps_per_epoch * self.config['update_ratio'], decay_rate=0.99, staircase=True) self.generator_optimizer = optimizers.Adam(learning_rate=lr_fn_G, beta_1=0.) self.discriminator_optimizer = optimizers.Adam(learning_rate=lr_fn_D, beta_1=0.) # build model to get target the name of tensors self.generator.build( input_shape=[(self.config['batch_size'], self.config['z_dim']), (self.config['batch_size'])]) self.var_name_list = [ var.name for var in self.generator.trainable_variables ] # metrics self.metrics = {} self.metrics['G_loss'] = tf.keras.metrics.Mean('generator_loss', dtype=tf.float32) self.metrics['D_loss'] = tf.keras.metrics.Mean('discriminator_loss', dtype=tf.float32) for name in self.var_name_list: self.metrics[name] = tf.keras.metrics.Mean(name, dtype=tf.float32) self.random_vector = tf.random.normal( [config['batch_size'], config['z_dim']]) self.fix_label = tf.random.uniform((self.config['batch_size'], ), 0, self.info['num_classes'], dtype=tf.int32)
def test_discriminator(): filepath = '/home/yct/data/imagenet_small' dataset, info = get_dataset_and_info(filepath) sample = next(iter(dataset.take(1))) batch_size = sample[0].shape[0] num_classes = info['num_classes'] discriminator = get_discriminator(num_classes) img = tf.random.normal([batch_size, 128, 128, 3]) label = np.random.randint(0, num_classes, (batch_size)) label = tf.cast(label, tf.int32) output = discriminator([img, label]) assert output.shape == [batch_size, 1], output.shape print("Discriminator OK")
def main(): args = parser.parse_args() pp.pprint(vars(args)) # seed np.random.seed(args.seed) tf.set_random_seed(args.seed) # Data filename_queue = get_filename_queue( split_file=os.path.join(args.data_dir, 'splits', args.dataset, args.split + '.lst'), data_dir=os.path.join(args.data_dir, args.dataset) ) if args.dataset == "cifar-10": image, label = get_input_cifar10(filename_queue) output_size = 32 c_dim = 3 else: image = get_input_image(filename_queue, output_size=args.output_size, image_size=args.image_size, c_dim=args.c_dim ) output_size = args.output_size c_dim = args.c_dim image_batch = create_batch([image], batch_size=args.batch_size, num_preprocess_threads=16, min_queue_examples=10000) config = vars(args) generator = models.get_generator(args.g_architecture, output_size=args.output_size, c_dim=args.c_dim, f_dim=args.gf_dim) discriminator = models.get_discriminator(args.d_architecture, output_size=args.output_size, c_dim=args.c_dim, f_dim=args.df_dim) train(generator, discriminator, image_batch, config)
def main(): args = parser.parse_args() pp.pprint(vars(args)) config = vars(args) # train with different datasets if args.dataset == 'oracle': oracle_model = OracleLstm(num_vocabulary=args.vocab_size, batch_size=args.batch_size, emb_dim=args.gen_emb_dim, hidden_dim=args.hidden_dim, sequence_length=args.seq_len, start_token=args.start_token) oracle_loader = OracleDataLoader(args.batch_size, args.seq_len) gen_loader = OracleDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=args.vocab_size, batch_size=args.batch_size, seq_len=args.seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=args.seq_len, vocab_size=args.vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) oracle_train(generator, discriminator, oracle_model, oracle_loader, gen_loader, config) elif args.dataset in ['image_coco', 'emnlp_news']: # custom dataset selected data_file = resources_path(args.data_dir, '{}.txt'.format(args.dataset)) sample_dir = resources_path(config['sample_dir']) oracle_file = os.path.join(sample_dir, 'oracle_{}.txt'.format(args.dataset)) data_dir = resources_path(config['data_dir']) if args.dataset == 'image_coco': test_file = os.path.join(data_dir, 'testdata/test_coco.txt') elif args.dataset == 'emnlp_news': test_file = os.path.join(data_dir, 'testdata/test_emnlp.txt') else: raise NotImplementedError('Unknown dataset!') if args.dataset == 'emnlp_news': data_file, lda_file = create_subsample_data_file(data_file) else: lda_file = data_file seq_len, vocab_size, word_index_dict, index_word_dict = text_precess( data_file, test_file, oracle_file=oracle_file) config['seq_len'] = seq_len config['vocab_size'] = vocab_size print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) config['topic_loss_weight'] = args.topic_loss_weight if config['LSTM']: if config['topic']: topic_number = config['topic_number'] oracle_loader = RealDataTopicLoader(args.batch_size, args.seq_len) oracle_loader.set_dataset(args.dataset) oracle_loader.set_files(data_file, lda_file) oracle_loader.topic_num = topic_number oracle_loader.set_dictionaries(word_index_dict, index_word_dict) generator = models.get_generator( args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token, TopicInMemory=args.topic_in_memory, NoTopic=args.no_topic) from real.real_gan.real_topic_train_NoDiscr import real_topic_train_NoDiscr real_topic_train_NoDiscr(generator, oracle_loader, config, args) else: generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) oracle_loader = RealDataLoader(args.batch_size, args.seq_len) oracle_loader.set_dictionaries(word_index_dict, index_word_dict) oracle_loader.set_dataset(args.dataset) oracle_loader.set_files(data_file, lda_file) oracle_loader.topic_num = config['topic_number'] from real.real_gan.real_train_NoDiscr import real_train_NoDiscr real_train_NoDiscr(generator, oracle_loader, config, args) else: if config['topic']: topic_number = config['topic_number'] oracle_loader = RealDataTopicLoader(args.batch_size, args.seq_len) oracle_loader.set_dataset(args.dataset) oracle_loader.set_files(data_file, lda_file) oracle_loader.topic_num = topic_number oracle_loader.set_dictionaries(word_index_dict, index_word_dict) generator = models.get_generator( args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token, TopicInMemory=args.topic_in_memory, NoTopic=args.no_topic) discriminator = models.get_discriminator( args.d_architecture, batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) if not args.no_topic: topic_discriminator = models.get_topic_discriminator( args.topic_architecture, batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn, discriminator=discriminator) else: topic_discriminator = None from real.real_gan.real_topic_train import real_topic_train real_topic_train(generator, discriminator, topic_discriminator, oracle_loader, config, args) else: generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator( args.d_architecture, batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) oracle_loader = RealDataLoader(args.batch_size, args.seq_len) from real.real_gan.real_train import real_train real_train(generator, discriminator, oracle_loader, config, args) elif args.dataset in ['Amazon_Attribute']: # custom dataset selected data_dir = resources_path(config['data_dir'], "Amazon_Attribute") sample_dir = resources_path(config['sample_dir']) oracle_file = os.path.join(sample_dir, 'oracle_{}.txt'.format(args.dataset)) train_file = os.path.join(data_dir, 'train.csv') dev_file = os.path.join(data_dir, 'dev.csv') test_file = os.path.join(data_dir, 'test.csv') # create_tokens_files(data_files=[train_file, dev_file, test_file]) config_file = load_json(os.path.join(data_dir, 'config.json')) config = {**config, **config_file} # merge dictionaries from real.real_gan.loaders.amazon_loader import RealDataAmazonLoader oracle_loader = RealDataAmazonLoader(args.batch_size, args.seq_len) oracle_loader.create_batches( data_file=[train_file, dev_file, test_file]) oracle_loader.model_index_word_dict = load_json( join(data_dir, 'index_word_dict.json')) oracle_loader.model_word_index_dict = load_json( join(data_dir, 'word_index_dict.json')) generator = models.get_generator("amazon_attribute", vocab_size=config['vocabulary_size'], batch_size=args.batch_size, seq_len=config['seq_len'], gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token, user_num=config['user_num'], product_num=config['product_num'], rating_num=5) discriminator = models.get_discriminator( "amazon_attribute", batch_size=args.batch_size, seq_len=config['seq_len'], vocab_size=config['vocabulary_size'], dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) from real.real_gan.amazon_attribute_train import amazon_attribute_train amazon_attribute_train(generator, discriminator, oracle_loader, config, args) elif args.dataset in ['CustomerReviews', 'imdb']: from real.real_gan.loaders.custom_reviews_loader import RealDataCustomerReviewsLoader from real.real_gan.customer_reviews_train import customer_reviews_train # custom dataset selected if args.dataset == 'CustomerReviews': data_dir = resources_path(config['data_dir'], "MovieReviews", "cr") elif args.dataset == 'imdb': data_dir = resources_path(config['data_dir'], "MovieReviews", 'movie', 'sstb') else: raise ValueError sample_dir = resources_path(config['sample_dir']) oracle_file = os.path.join(sample_dir, 'oracle_{}.txt'.format(args.dataset)) train_file = os.path.join(data_dir, 'train.csv') # create_tokens_files(data_files=[train_file, dev_file, test_file]) config_file = load_json(os.path.join(data_dir, 'config.json')) config = {**config, **config_file} # merge dictionaries oracle_loader = RealDataCustomerReviewsLoader(args.batch_size, args.seq_len) oracle_loader.create_batches(data_file=[train_file]) oracle_loader.model_index_word_dict = load_json( join(data_dir, 'index_word_dict.json')) oracle_loader.model_word_index_dict = load_json( join(data_dir, 'word_index_dict.json')) generator = models.get_generator("CustomerReviews", vocab_size=config['vocabulary_size'], batch_size=args.batch_size, start_token=args.start_token, seq_len=config['seq_len'], gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, sentiment_num=config['sentiment_num']) discriminator_positive = models.get_discriminator( "CustomerReviews", scope="discriminator_positive", batch_size=args.batch_size, seq_len=config['seq_len'], vocab_size=config['vocabulary_size'], dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) discriminator_negative = models.get_discriminator( "CustomerReviews", scope="discriminator_negative", batch_size=args.batch_size, seq_len=config['seq_len'], vocab_size=config['vocabulary_size'], dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) customer_reviews_train(generator, discriminator_positive, discriminator_negative, oracle_loader, config, args) else: raise NotImplementedError('{}: unknown dataset!'.format(args.dataset)) print("RUN FINISHED") return
def __init__(self, config): self.ds_train, self.config = get_dataset_and_info(config) # ["/gpu:{}".format(i) for i in range(self.config['num_gpu'])] self.strategy = tf.distribute.MirroredStrategy() \ if len(self.config['gpu']) > 1 \ else tf.distribute.OneDeviceStrategy(device="/gpu:0") self.steps_per_epoch = self.config['num_records'] // self.config[ 'global_batch_size'] print("total steps: ", self.steps_per_epoch * self.config['epoch']) self.ds_train = self.strategy.experimental_distribute_dataset( self.ds_train) with self.strategy.scope(): if self.config['model'] == 'vanilla': self.generator = get_generator(self.config) self.discriminator = get_discriminator(self.config) #TODO: fix resnet model #elif config['model'] == 'resnet': # self.generator = get_res_generator(config) # self.discriminator = get_res_discriminator(config) else: raise ValueError('Unsupported model type') lr_fn_G = ExponentialDecay(self.config['lr_g'], self.steps_per_epoch, decay_rate=self.config['decay_rate'], staircase=True) lr_fn_D = ExponentialDecay(self.config['lr_d'], self.steps_per_epoch * self.config['update_ratio'], decay_rate=self.config['decay_rate'], staircase=True) self.optimizer_G = optimizers.Adam(learning_rate=lr_fn_G, beta_1=0.) self.optimizer_D = optimizers.Adam(learning_rate=lr_fn_D, beta_1=0.) if self.config['loss'] == "cross_entropy": print("use ce loss") self.gloss_fn = cross_entropy_g self.dloss_fn = cross_entropy_d elif self.config['loss'] == "hinge_loss": print("use hinge loss") self.gloss_fn = hinge_loss_g self.dloss_fn = hinge_loss_d else: raise ValueError('Unsupported loss type') # build model & get trainable variables. self.generator.build( input_shape=[(self.config['batch_size'], self.config['z_dim']), (self.config['batch_size'])]) self.discriminator.build( input_shape=[(self.config['batch_size'], config['img_size'], config['img_size'], 3), (self.config['batch_size'])]) self.generator.summary() self.discriminator.summary() self.var_G = [var.name for var in self.generator.variables] self.Train_var_G = [ var.name for var in self.generator.trainable_variables ] self.Train_var_D = [ var.name for var in self.discriminator.trainable_variables ] print("-" * 20, "generator weights", "-" * 20) pprint(self.Train_var_G) print("-" * 20, "discrimiator weights", "-" * 20) pprint(self.Train_var_D) # checkpoints self.ckpt_G = tf.train.Checkpoint(step=tf.Variable(1), optimizer=self.optimizer_G, net=self.generator) self.ckpt_D = tf.train.Checkpoint(step=tf.Variable(1), optimizer=self.optimizer_D, net=self.discriminator) self.CkptManager_G = tf.train.CheckpointManager( self.ckpt_G, '{}/G'.format(self.config['ckpt_dir']), max_to_keep=10, checkpoint_name='epoch') self.CkptManager_D = tf.train.CheckpointManager( self.ckpt_D, '{}/D'.format(self.config['ckpt_dir']), max_to_keep=10, checkpoint_name='epoch') # metrics self.metrics = {} self.metrics['G_loss'] = tf.keras.metrics.Mean('generator_loss', dtype=tf.float32) self.metrics['D_loss'] = tf.keras.metrics.Mean('discriminator_loss', dtype=tf.float32) self.metrics.update({ name: tf.keras.metrics.Mean(name, dtype=tf.float32) for name in self.var_G }) self.metrics.update({ name + '/norm': tf.keras.metrics.Mean(name + '/norm', dtype=tf.float32) for name in self.Train_var_G }) #for name in self.Train_var_G: # self.metrics[name] = #var_name = [var.name for var in self.generator.variables] #for name in var_name: # self.metrics[name] = tf.keras.metrics.Mean( # name, dtype=tf.float32) self.fixed_vector = tf.random.normal( [config['batch_size'], config['z_dim']]) self.fixed_label = tf.random.uniform((self.config['batch_size'], ), 0, self.config['num_classes'], dtype=tf.int32)
def main(): args = parser.parse_args() # pp.pprint(vars(args)) config = vars(args) # train with different datasets if args.dataset == 'oracle': oracle_model = OracleLstm(num_vocabulary=args.vocab_size, batch_size=args.batch_size, emb_dim=args.gen_emb_dim, hidden_dim=args.hidden_dim, sequence_length=args.seq_len, start_token=args.start_token) oracle_loader = OracleDataLoader(args.batch_size, args.seq_len) gen_loader = OracleDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=args.vocab_size, batch_size=args.batch_size, seq_len=args.seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=args.seq_len, vocab_size=args.vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) oracle_train(generator, discriminator, oracle_model, oracle_loader, gen_loader, config) elif args.dataset in ['image_coco', 'emnlp_news', 'emnlp_news_small']: data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset)) seq_len, vocab_size, word_index_dict, index_word_dict = text_precess( data_file) config['seq_len'] = seq_len config['vocab_size'] = vocab_size # print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) oracle_loader = RealDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=vocab_size, batch_size=args.batch_size, seq_len=seq_len, gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator(args.d_architecture, batch_size=args.batch_size, seq_len=seq_len, vocab_size=vocab_size, dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) # print("gen params = ", count_params(generator.trainable_variables)) # print("disc params = ", count_params(discriminator.trainable_variables)) # sys.stdout.flush() load_model = False if config['load_saved_model'] != "": log_dir_path = os.path.dirname(config['load_saved_model']) config['log_dir'] = log_dir_path config['sample_dir'] = os.path.join( os.path.split(log_dir_path)[0], 'samples') index_word_dict = load_index_to_word_dict( os.path.join(config['log_dir'], "index_to_word_dict.json")) word_index_dict = {v: k for k, v in index_word_dict.items()} load_model = True else: if not os.path.exists(config['log_dir']): os.makedirs(config['log_dir']) json.dump( index_word_dict, open( os.path.join(config['log_dir'], "index_to_word_dict.json"), 'w')) json.dump( word_index_dict, open( os.path.join(config['log_dir'], "word_to_index_dict.json"), 'w')) pp.pprint(config) print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) sys.stdout.flush() real_train(generator, discriminator, oracle_loader, config, word_index_dict, index_word_dict, load_model=load_model) if args.dataset == "emnlp_news" or args.dataset == "emnlp_news_small": call([ "python", 'bleu_post_training_emnlp.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na' ], cwd=".") elif args.dataset == "image_coco": call([ "python", 'bleu_post_training.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na' ], cwd=".") elif args.dataset in ['ace0_small']: # data_file = os.path.join(args.data_dir, '{}.txt'.format(args.dataset)) # seq_len, vocab_size, word_index_dict, index_word_dict = text_precess(data_file) seq_len = config['seq_len'] vocab_size = config['vocab_size'] # # print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) # oracle_loader = RealDataLoader(args.batch_size, args.seq_len) generator = models.get_generator(args.g_architecture, vocab_size=config['vocab_size'], batch_size=args.batch_size, seq_len=config['seq_len'], gen_emb_dim=args.gen_emb_dim, mem_slots=args.mem_slots, head_size=args.head_size, num_heads=args.num_heads, hidden_dim=args.hidden_dim, start_token=args.start_token) discriminator = models.get_discriminator( args.d_architecture, batch_size=args.batch_size, seq_len=config['seq_len'], vocab_size=config['vocab_size'], dis_emb_dim=args.dis_emb_dim, num_rep=args.num_rep, sn=args.sn) # print("gen params = ", count_params(generator.trainable_variables)) # print("disc params = ", count_params(discriminator.trainable_variables)) # sys.stdout.flush() load_model = False if config['load_saved_model'] != "": log_dir_path = os.path.dirname(config['load_saved_model']) config['log_dir'] = log_dir_path config['sample_dir'] = os.path.join( os.path.split(log_dir_path)[0], 'samples') index_word_dict = load_index_to_word_dict( os.path.join(config['log_dir'], "index_to_word_dict.json")) word_index_dict = {v: k for k, v in index_word_dict.items()} load_model = True else: if not os.path.exists(config['log_dir']): os.makedirs(config['log_dir']) # json.dump(index_word_dict, open(os.path.join(config['log_dir'], "index_to_word_dict.json"), 'w')) # json.dump(word_index_dict, open(os.path.join(config['log_dir'], "word_to_index_dict.json"), 'w')) pp.pprint(config) print('seq_len: %d, vocab_size: %d' % (seq_len, vocab_size)) sys.stdout.flush() real_train_traj(generator, discriminator, None, config, None, None, load_model=load_model) # if args.dataset == "emnlp_news" or args.dataset == "emnlp_news_small": # call(["python", 'bleu_post_training_emnlp.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na'], cwd=".") # elif args.dataset == "image_coco": # call(["python", 'bleu_post_training.py', os.path.join(os.path.split(config['log_dir'])[0], 'samples'), 'na'], cwd=".") else: raise NotImplementedError('{}: unknown dataset!'.format(args.dataset))
def train(batch_size, learning_rate, beta_1, epochs, data_path): """ Train the generator and discriminator :param batch_size: Batch size :param learning_rate: Learning rate :param beta_1: beta_1 for Adam optimizer :param epochs: Number of epochs :param data_path: Path of directory """ input_data = load_data(data_path, constants.IMAGE_SIZE) # normalize data between (-1, 1) which is the same output scale as tanh input_data = (input_data.astype(numpy.float32) - 127.5) / 127.5 # Get generator, discriminator and composed network generator = get_generator() discriminator = get_discriminator() generative_adversarial_network = get_generative_adversarial_network( generator, discriminator) generator_optimizer = Adam(lr=learning_rate, beta_1=beta_1) discriminator_optimizer = Adam(lr=learning_rate, beta_1=beta_1) # Compile all networks generator.compile(loss='binary_crossentropy', optimizer=generator_optimizer) generative_adversarial_network.compile(loss='binary_crossentropy', optimizer=generator_optimizer) discriminator.trainable = True discriminator.compile(loss='binary_crossentropy', optimizer=discriminator_optimizer) for epoch in range(epochs): print("Epoch:%d" % epoch) for batch_number in range(int(input_data.shape[0] / batch_size)): input_batch = input_data[batch_number * batch_size:(batch_number + 1) * batch_size] noise = numpy.random.uniform(-1, 1, size=(batch_size, 100)) generated_images = generator.predict(noise, verbose=0) input_batch = numpy.concatenate((input_batch, generated_images)) output_batch = [1] * batch_size + [0] * batch_size # train the discriminator to reject the generated images discriminator_loss = discriminator.train_on_batch( input_batch, output_batch) noise = numpy.random.uniform(-1, 1, (batch_size, 100)) # we disable training the discriminator when training the generator since the # discriminator is being used to judge, we don't want to train it on false data discriminator.trainable = False # train the generator with the objective of getting the generated images approved generator_loss = generative_adversarial_network.train_on_batch( noise, [1] * batch_size) discriminator.trainable = True print("Batch=%d, Discriminator Loss=%f" % (batch_number, discriminator_loss)) print("Batch=%d, Generator Loss=%f" % (batch_number, generator_loss)) if epoch % 10 == 9: generator.save_weights('generator_weights.h5', True) discriminator.save_weights('discriminator_weights.h5', True)
def main(): generator = get_generator() discriminator = get_discriminator() gan = get_generator_containing_discriminator(generator, discriminator) model_path = 'weights_0219/generator_16600.hdf5' generator.load_weights(filepath=model_path) model_path = 'weights_0219/discriminator_16600.hdf5' discriminator.load_weights(filepath=model_path) opt_d = Adam(lr=1e-4, beta_1=0.5) discriminator.compile(optimizer=opt_d, loss='binary_crossentropy', metrics=['accuracy']) generator_loss = GeneratorLoss() opt_g = Adam(lr=2e-4, beta_1=0.5) gan.compile(optimizer=opt_g, loss=generator_loss) list_all_train_metric = [] list_all_valid_metric = [] for epoch in range(100000): print('epoch:', epoch) list_train_metric = [] for x_batch, y_batch in train_generator(): gen_data = generator.predict(x_batch) X = np.append(y_batch, gen_data, axis=0) y = np.array([1] * y_batch.shape[0] + [0] * gen_data.shape[0]) loss_d, acc_d = discriminator.train_on_batch(X, y) X = x_batch y = np.hstack([np.ones((y_batch.shape[0], 1)), y_batch]) loss_g = gan.train_on_batch(X, y) loss_g_mge = generator_loss.mge_loss loss_g_adv = generator_loss.adv_loss list_train_metric.append( [loss_d, acc_d, loss_g, loss_g_adv, loss_g_mge]) mge_adv_loss_weight = generator_loss.mge_adv_loss_weight train_metric = np.mean(list_train_metric, axis=0) list_all_train_metric.append(train_metric) pd.DataFrame(list_all_train_metric).to_csv('logs/train_metric.csv') print('train loss:', train_metric) print('mge_adv_loss_weight:', mge_adv_loss_weight, train_metric[4] / train_metric[3]) list_valid_metric = [] for x_batch, y_batch in valid_generator(): generated = generator.predict(x_batch) loss_g_mge = mean_squared_error(y_batch, generated) X = np.append(y_batch, generated, axis=0) y = np.array([0] * len(x_batch) + [1] * len(generated)) pred = discriminator.predict(X) loss_d = log_loss(y, pred) acc_d = accuracy_score(y, pred > 0.5) roc_d = roc_auc_score(y, pred) list_valid_metric.append([loss_d, acc_d, roc_d, loss_g_mge]) valid_metric = np.mean(list_valid_metric, axis=0) list_all_valid_metric.append(valid_metric) pd.DataFrame(list_all_valid_metric).to_csv('logs/valid_metric.csv') print('valid loss: ', valid_metric) print('==============') if epoch % 100 == 0: print('save model') generator.save_weights('weights/generator_{}.hdf5'.format(epoch), True) discriminator.save_weights( 'weights/discriminator_{}.hdf5'.format(epoch), True)
start_time = time.time() # get randomly selected 'real' samples X_real, y_real = utils.generate_real_samples(dataset, half_batch) # update discriminator model weights d_loss1, _ = discriminator.train_on_batch(X_real, y_real) # generate 'fake' examples X_fake, y_fake = utils.generate_fake_samples(generator, latent_dim, half_batch) # update discriminator model weights d_loss2, _ = discriminator.train_on_batch(X_fake, y_fake) # prepare points in latent space as input for the generator X_gan = utils.generate_latent_points(latent_dim, n_batch) # create inverted labels for the fake samples y_gan = tf.ones((n_batch, 1)) # update the generator via the discriminator's error g_loss = gan_model.train_on_batch(X_gan, y_gan) # summarize loss on this batch time_taken = time.time() - start_time print('>%d, %d/%d, d1=%.3f, d2=%.3f g=%.3f Time Taken:%.2f seconds' % (i + 1, j + 1, bat_per_epo, d_loss1, d_loss2, g_loss, time_taken)) # evaluate the model performance, sometimes if (i + 1) % 10 == 0: summarize_performance(i, generator, discriminator, dataset, latent_dim) latent_dim = 100 discriminator = models.get_discriminator() generator = models.get_generator(latent_dim) gan_model = models.def_gan(generator, discriminator) dataset = utils.load_real_samples() train(dataset, generator, discriminator, gan_model, latent_dim)
def main(args): if not tf.gfile.Exists(FLAGS.log_dir): tf.gfile.MakeDirs(FLAGS.log_dir) run_name = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') run_dir = os.path.join(FLAGS.log_dir, run_name) mnist = input_data.read_data_sets('./MNIST_data', one_hot=False) global_step = tf.Variable(0, trainable=False, name='global_step') training_phase = tf.placeholder_with_default(False, [], 'training_phase') mnist_batch = tf.placeholder(tf.float32, [None, MNIST_IMG_W * MNIST_IMG_H], 'mnist_batch') real_images = tf.reshape(mnist_batch, [-1, MNIST_IMG_W, MNIST_IMG_H, MNIST_IMG_CHAN]) random_seed = tf.random_uniform([FLAGS.batch_size, RANDOM_SEED_SIZE], -1., 1.) with tf.variable_scope(GENERATOR_SCOPE): generator = get_generator(random_seed, training_phase) epsilon = tf.random_uniform(shape=(FLAGS.batch_size, 1, 1, 1), minval=0., maxval=1.) x_hat = epsilon * real_images + (1.0 - epsilon) * generator tf.summary.image('generator', generator) with tf.variable_scope(DISCRIMINATOR_SCOPE): real_logits, _ = get_discriminator(real_images, training_phase) fake_logits, _ = get_discriminator(generator, training_phase, True) rand_logits, _ = get_discriminator(x_hat, training_phase, True) d_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, DISCRIMINATOR_SCOPE) g_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, GENERATOR_SCOPE) # gradient penalty grads_rand = tf.gradients(rand_logits, [x_hat]) gradient_penalty = LAMBDA * tf.square(tf.norm(grads_rand[0], ord=2) - 1.0) # calculate discriminator's loss # d_loss = em_loss(real_labels, fake_logits) - em_loss(real_labels, rand_logits) + _gradient_penalty d_loss = tf.reduce_mean(fake_logits) - tf.reduce_mean( real_logits) + gradient_penalty g_loss = -tf.reduce_mean(fake_logits) tf.summary.scalar('d_loss', d_loss) tf.summary.scalar('g_loss', g_loss) d_optim = tf.train.AdamOptimizer(LEARNING_RATE, beta1=BETA_1, beta2=BETA_2).minimize(d_loss, var_list=d_vars) g_optim = tf.train.AdamOptimizer(LEARNING_RATE, beta1=BETA_1, beta2=BETA_2).minimize( g_loss, global_step=global_step, var_list=g_vars) summary_op = tf.summary.merge_all() saver = tf.train.Saver(var_list=tf.trainable_variables(), pad_step_number=True) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() summary_writer = tf.summary.FileWriter(run_dir, graph=sess.graph) for step in range(FLAGS.max_steps): feed_dict = { mnist_batch: mnist.train.next_batch(FLAGS.batch_size)[0], training_phase: True } t0 = time.time() _, d_loss_val = sess.run([d_optim, d_loss], feed_dict=feed_dict) if step > 0 and step % DISCRIMINATOR_ITERS == 0: _, g_loss_val, summary = sess.run( [g_optim, g_loss, summary_op], feed_dict=feed_dict) t = time.time() - t0 examples_per_sec = FLAGS.batch_size / t if step > 0 and step % 10 == 0: summary_writer.add_summary(summary, global_step=step) format_str = '{} step: {} d_loss: {:8f} g_loss: {:8f} ({:2f} ex/s)' dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') print( format_str.format(dt, step, d_loss_val, g_loss_val, examples_per_sec)) if step > 0 and (step + 1) % 1000 == 0 or step == FLAGS.max_steps - 1: checkpoint_path = os.path.join(run_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step)
EMBEDDING_SIZE = 256 parser = get_parser() config = parser.parse_args() if not config.force_mode: problems = check_sanity(config) if problems: print("Discrepancies in command line arguments detected:") for problem in problems: print(" *", problem) print("Use flag --force if you are really want to run script with these parameters.") exit(2) if config.mode == 'train': disc = get_discriminator() gen = get_generator() real_dataset = get_CJK(size=32) # TODO: adjust this noise_dataset = get_noise((EMBEDDING_SIZE,)) # TODO (optional): read this parameter from config if config.load_dir is not None: load_models(disc, gen, directory=config.load_dir, label=config.load_label) train(disc, gen, real_dataset, noise_dataset, n_iter=config.iter, k=config.disc_steps, batch_size=config.batch, verbose=config.verbose, histogram_dir=config.histogram_dir, histogram_freq=config.histogram_freq, images=config.images, sample_dir=config.sample_dir, sample_freq=config.sample_freq) if config.save_dir is not None: save_models(disc, gen, directory=config.save_dir, label=config.save_label) elif config.mode == 'sampling':
print(args) image_size = 32 # Immutable if args.cuda: gpu_index = args.gpu_index torch.cuda.set_device(gpu_index) device = torch.device('cuda') else: gpu_index = None device = torch.device('cpu') train_data, _, channels, classes = data_loader.get_train_data( args.dataset, args.data_root, args.batch_size, normalize=args.data_normalization) discriminator = models.get_discriminator(channels, args.ndf) generator = models.get_generator(args.nz, channels, args.ngf) if gpu_index: discriminator = discriminator.cuda(gpu_index) generator = generator.cuda(gpu_index) if args.classifier: # classifier = models.get_classifier(channels, classes) classifier = models.vgg13() classifier.load_state_dict(torch.load(args.classifier)) if gpu_index: classifier = classifier.cuda(gpu_index) classifier = classifier.eval() print('Using pre-trained classifier. Classifier loaded from:\n%s' % args.classifier) fixed_noise = torch.randn(args.batch_size, args.nz, 1, 1, device=device)
from dataset import get_dataset from models import get_generator, get_discriminator, GANModel BATCH_SIZE = 12 EPOCHS = 100 if __name__ == '__main__': gan = GANModel(generator=get_generator(), discriminator=get_discriminator()) train_dataset, train_steps = get_dataset("dataset/train", batch_size=BATCH_SIZE) valid_dataset, valid_steps = get_dataset("dataset/valid", batch_size=BATCH_SIZE) gan.fit(train_dataset, steps_pre_epoch=train_steps, epochs=EPOCHS, valid_dataset=valid_dataset, valid_steps=valid_steps)