def set_optimizer(self, model_0): # OPTIMIZER # do not regularize biases params2reg = [] params0reg = [] for p_name, p_value in model_0.named_parameters(): if '.bias' in p_name: params0reg += [p_value] else: params2reg += [p_value] assert len(params0reg) + len(params2reg) == len( list(model_0.parameters())) groups = [ dict(params=params2reg), dict(params=params0reg, weight_decay=.0) ] optimizer = optim.Adam(groups, lr=self.params['lr'], weight_decay=self.params['reg'], amsgrad=True) # Train Model print_options(self.params) for p_name, p_value in model_0.named_parameters(): if p_value.requires_grad: print(p_name) return optimizer
def question_action(post): """The execution loop of a user to take post actions on selected question Args: post (post_row): The post on which post_actions are being executed """ db.increment_question_view_count(post) print_all_post_details(post) # Setup post action options pa_actions = ["Answer question", "See answers", "Vote"] while(True): print_options(pa_actions) action = request_input()[0] if action == "/back": return # Post action-answer elif (action == "1"): post_answer(post) # Post action-see answers elif (action == "2"): answer = see_question_answers(post) if answer is not None: answer_action(answer) # Post action-vote elif (action == "3"): post_vote(post) # Invalid selection else: print_invalid_option() print("")
def logged_in(uid_param): """The execution loop for a user Args: uid_param (str): The uid of the user """ global uid uid = uid_param print("To exit program type `/exit` at any point") print("In any submenu or input, type `/back` to return up a level.") while (True): print_options(["Post a question", "Search for questions"]) action = request_input()[0] if action == "/back": print("Already at the top-level menu.") # Post a Question elif (action == "1"): post_question() print("") # Search for posts elif (action == "2"): post = search_select_questions() if post is not None: question_action(post) # Invalid selection else: print_invalid_option(max_option=2)
def room(hero, level, game_level): junction_map = {1: "battle room", 2: "trap room", 3: "aeon chamber"} if game_level == "room": room_choice = 0 print_slow("Which room would you to enter?\n") while room_choice not in junction_map.keys(): print_options(junction_map) choice = input_int() room_choice = junction_map.get(choice) if room_choice == "battle room": enemy = Monster.monster_spawn(level) print_slow("You encounter some dangerous looking" " {}\n".format(enemy.name)) battle_choices(hero, enemy) junction_map.pop(choice) break elif room_choice == "trap room": junction_map.pop(choice) print("Trap room coming soon...") continue # call trap room function elif room_choice == "aeon chamber": junction_map.pop(choice) print("Aeon Chamber coming soon...") # call Aeon chamber function continue else: print("Please choose a valid input...") if game_level == "boss": print_slow("You've entered the lair of a dangerous boss!") print_slow("Boss content coming soon...")
def _options(self, item, category, action): options_map = {1: "equip", 2: "drop", 3: "back"} while True: old_item = self.get_equipped(category) print_slow("Currently equipped: {} \n".format(old_item)) print_slow("Selected item : {} \n".format(item)) print_options(options_map) answer = options_map[input_int()] if answer == "equip": self.equip(item, category, old_item) self.view_category(action, category) break elif answer == "drop": while True: print_slow("Are you sure you want to drop item? \n") print_slow("1. Yes \n" + "2. No \n") answer = input_int() if answer == 1: self.drop(item, category) self._options(item, category, action) elif answer == 2: self._options(item, category, action) break else: print_slow("Please choose a valid option") continue
def __init__(self, num_features=2000, do_tf_logging=False): print('Using LfNetFeature2D') self.lock = RLock() self.model_base_path = kLfNetBasePath self.model_path = kLfNetModelPath self.lfnet_config = build_lfnet_config() print_options(self.lfnet_config, 'LFNET CONFIG') self.num_features = num_features self.lfnet_config.top_k = self.num_features set_tf_logging(do_tf_logging) print('==> Loading pre-trained network.') # Build Networks tf.reset_default_graph() self.photo_ph = tf.placeholder( tf.float32, [1, None, None, 1]) # input grayscale image, normalized by 0~1 is_training = tf.constant(False) # Always False in testing self.ops = build_networks(self.lfnet_config, self.photo_ph, is_training) tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True self.session = tf.Session(config=tf_config) self.session.run(tf.global_variables_initializer()) # load model saver = tf.train.Saver() print('Load trained models...') if os.path.isdir(self.lfnet_config.model): checkpoint = tf.train.latest_checkpoint(self.lfnet_config.model) model_dir = self.lfnet_config.model else: checkpoint = self.lfnet_config.model model_dir = os.path.dirname(self.lfnet_config.model) if checkpoint is not None: print('Checkpoint', os.path.basename(checkpoint)) print("[{}] Resuming...".format(time.asctime())) saver.restore(self.session, checkpoint) else: raise ValueError('Cannot load model from {}'.format(model_dir)) print('==> Successfully loaded pre-trained network.') self.pts = [] self.kps = [] self.des = [] self.frame = None self.keypoint_size = 20. # just a representative size for visualization and in order to convert extracted points to cv2.KeyPoint
def base_choices(): choice_map = {1: "explore", 2: "inventory", 3: "quit"} choice = 0 while True: print_options(choice_map) choice = choice_map.get(input_int()) if choice in choice_map.values(): break print("Choose a valid option...") return choice
def battle_choices(hero, enemy): battle_map = {1: "attack", 2: "flee", 3: "potion"} print_options(battle_map) battle_action = battle_map.get(input_int()) if battle_action == "attack": fight(hero, enemy) # launches battle elif battle_action == "flee": flee(hero, enemy) # attemps to flee the battle room elif battle_action == "potion": potion(hero, enemy) # allows hero to consume potions else: print("Please select a valid input...") battle_choices(hero, enemy) return hero
def search_tasks(): """ Starts the looping over the search actions. Once done, it falls back to main menu loop actions :return: """ loop_search = True while loop_search: print("Do you want to search by:" + "\n") search_option = input( utils.print_options(utils.SEARCHING_CRITERIA_ORDER, utils.SEARCHING_CRITERIA)) if search_option == 'f': utils.clean_scr() loop_search = False elif search_option == 'a': utils.clean_scr() get_by_exact_date() elif search_option == 'b': utils.clean_scr() get_by_range() elif search_option == 'c': utils.clean_scr() get_by_string() elif search_option == 'd': utils.clean_scr() get_by_time() elif search_option == 'e': utils.clean_scr() get_by_regex() else: utils.clean_scr() print('Please select a letter option.')
def __init__( self, num_features=2000, num_levels=5, # The number of downsample levels in the pyramid. scale_factor=2, # The scale factor to extract patches before descriptor. scale_factor_levels=np.sqrt( 2), # The scale factor between the pyramid levels. do_cuda=True, do_tf_logging=False): print('Using KeyNetDescFeature2D') self.lock = RLock() self.model_base_path = config.cfg.root_folder + '/thirdparty/keynet/' set_tf_logging(do_tf_logging) self.do_cuda = do_cuda & torch.cuda.is_available() print('cuda:', self.do_cuda) device = torch.device("cuda:0" if self.do_cuda else "cpu") self.session = None self.keypoint_size = 8 # just a representative size for visualization and in order to convert extracted points to cv2.KeyPoint self.pts = [] self.kps = [] self.des = [] self.scales = [] self.scores = [] self.frame = None keynet_config = build_keynet_config(self.model_base_path) self.keynet_config = keynet_config keynet_config.num_points = num_features keynet_config.pyramid_levels = num_levels keynet_config.scale_factor = scale_factor keynet_config.scale_factor_levels = scale_factor_levels print_options(self.keynet_config, 'KEYNET CONFIG') print('==> Loading pre-trained network.') self.load_model() print('==> Successfully loaded pre-trained network.')
def mark_as_accepted(pid): """Allows user to mark a selected answer as accepted Args: pid (str): The pid of the answer which is being accepted Returns: (bool): True if the user chooses to logout, None otherwise """ question = db.get_question_of_answer(pid) if question is None: print("Failed to find the question of this answer") return if question[1] is not None: if question[1] == pid: print("This answer is already marked as accepted") return print("Mark answer as accepted") print("The answer's question already has an accepted answer.") print_options( ["Cancel and go back", "Replace current accepted answer"]) while True: action = request_input()[0] if action == "/logout": return True if action == "1" or action == "/back": print("The operation was cancelled") return elif action == "2": break else: print_invalid_option(2) mark_accepted_success = db.mark_accepted(pid, question[0]) if mark_accepted_success: print("The answer was successfully marked as accepted") else: print("Failed to mark the answer as accepted")
def logged_in(uid_param, is_privileged_param): """The execution loop for a user once logged in Args: uid_param (str): The uid of the logged in user is_privileged_param (bool): True if privileged user, False otherwise """ global uid uid = uid_param global is_privileged is_privileged = is_privileged_param print("Now logged in. To log out, type `/logout` at anytime.") print("In any submenu or input, type `/back` to return up a level.") while (True): print_options(["Post a question", "Search posts"]) action = request_input()[0] logout = None if action == "/back": print("Already at the top-level menu. To logout, type `/logout`.") elif (action == "/logout"): logout = True # Post a Question elif (action == "1"): logout = post_question() print("") # Search for posts elif (action == "2"): post, logout = search_select_posts() if not logout and post is not None: logout = post_action(post) # Invalid selection else: print_invalid_option(max_option=2) if logout: return
def login_or_signup(): """The execution loop for a user to login or signup Returns: (str): The uid of the logged in user, None if login/signup failed (bool): True if user is priviledged, False if user is not priviledged, None if login/signup failed """ print("To exit program type `/exit` at any point") print_options(["Login", "Sign-up"]) while (True): action = request_input()[0] # Login if action == '1': return login() # Sign up elif action == '2': return signup(), False # Invalid selection else: print_invalid_option(max_option=2)
def answer_action(post): """The execution loop of a user to take answer actions on selected post Args: post (post_row): The post on which post_actions are being executed """ # Setup post action options pa_actions = ["Vote"] while(True): print_options(pa_actions) action = request_input()[0] if action == "/back": return # Post action-vote elif (action == "1"): post_vote(post) return # Invalid selection else: print_invalid_option() print("")
def execute(): loop = True while loop: print("\n" + "What would you like to do: " + "\n") main_option = input( utils.print_options(utils.OPTIONS_ORDER, utils.OPTIONS_TEXT)) if main_option == 'c': utils.clean_scr() print('Thanks.') loop = False elif main_option == 'a': utils.clean_scr() add_entry_data() elif main_option == 'b': utils.clean_scr() search_tasks() else: print('Please select a letter option.')
def trainer(args, parser): log_dir = args.logdir checkpoint_dir = args.checkpoint_dir LAMBDAsal = args.LAMBDAsal LAMBDAFM = args.LAMBDAFM LAMBDAD = args.LAMBDAD LAMBDA_p = args.LAMBDA_p LAMBDA_s = args.LAMBDA_s LAMBDA_r = args.LAMBDA_r base_lr = args.lr max_step = args.maxstep salmodelpath = args.salmodelpath batch_size = args.batch_size random_s = args.random_s nb_blocks = args.nb_blocks if not args.checkpoint_dir: if 'withstyle' in args.trainer: log_dir = os.path.join(log_dir, args.D, args.dataloader, args.trainer, args.G, 'Lsal_%.2f_Lp_%.3f_Lr_%.3f_ndf_%d_ngf_%d_lrg_%.5f_lrd_%.5f_donormG_%s_donormD_%s' % (LAMBDAsal, LAMBDA_p, LAMBDA_r, args.ndf, args.ngf, args.lr, args.lrd, str(args.donormG), str(args.donormD)), datetime.now().strftime("%Y%m%d-%H%M%S")) elif 'Ploss' in args.trainer: log_dir = os.path.join(log_dir, args.D, args.dataloader, args.trainer, args.G, 'Lsal_%.2f_Lp_%.3f_ndf_%d_ngf_%d_lrg_%.5f_lrd_%.5f_donormG_%s_donormD_%s' % (LAMBDAsal, LAMBDA_p, args.ndf, args.ngf, args.lr, args.lrd, str(args.donormG), str(args.donormD)), datetime.now().strftime("%Y%m%d-%H%M%S")) elif 'neurons' in args.trainer: log_dir = os.path.join(log_dir, args.D, args.dataloader, args.trainer, args.G, 'Lsal_%.2f_ndf_%d_ngf_%d_lrg_%.5f_lrd_%.5f_donormG_%s_donormD_%s_fcdim_%d_nbneurons_%d' %(LAMBDAsal, args.ndf, args.ngf, args.lr, args.lrd, str(args.donormG), str(args.donormD), args.fc_dim, args.nb_neurons), datetime.now().strftime("%Y%m%d-%H%M%S")) else: log_dir = os.path.join(log_dir, args.D, args.dataloader, args.trainer, args.G, 'Lsal_%.2f_ndf_%d_ngf_%d_lrg_%.5f_lrd_%.5f_donormG_%s_donormD_%s_fcdim_%d' %(LAMBDAsal, args.ndf, args.ngf, args.lr, args.lrd, str(args.donormG), str(args.donormD), args.fc_dim), datetime.now().strftime("%Y%m%d-%H%M%S")) if (not os.path.exists(log_dir)): os.makedirs(log_dir) else: log_dir = args.checkpoint_dir to_restore = checkpoint_dir print_freq = args.print_freq print_options(parser, args, log_dir, True) get_data_G = getattr(sys.modules[__name__], args.get_data_G) cocoiterator, max_images_coco = get_data_G(args.dataloader, os.path.join(args.data, 'maskdir'), args.path, True, batch_size, random_s, args.shape1, args.shape2, args.drop_remainder) Gmodel = getattr(sys.modules[__name__], args.G) Dmodel = getattr(sys.modules[__name__], args.D) model_trainer = getattr(sys.modules[__name__], args.trainer) model_trainer = model_trainer(Gmodel, Dmodel, salmodelpath, cocoiterator, print_freq, log_dir, to_restore, base_lr, max_step, checkpoint_dir, max_images_coco, batch_size, args) # model_trainer = model_trainer(Gmodel, Dmodel, salmodelpath, cocoiterator, print_freq, log_dir, to_restore, base_lr, # max_step, checkpoint_dir, max_images_coco, batch_size, args) if args.resave: model_trainer.resave() else: model_trainer.train() return log_dir
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'] ) ) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Train Loss ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') dkl_log.write('step,time,DKL\n') ll_log.write('step,time,-LL\n') dec_sig_log.write('step,time,Decoder Log Sigma^2\n') enc_sig_log.write('step,time,Encoder Log Sigma^2\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2\n') dec_mean_log.write('step,time,Decoder Mean\n') enc_mean_log.write('step,time,Encoder Mean\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): model = cupboard(options['model'])( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae' ) log.info('Model initialized') # Define inputs model_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], np.prod(np.array(options['img_shape']))], name = 'enc_inputs' ) sampler_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['latent_dims']], name = 'dec_inputs' ) log.info('Inputs defined') # Define forward pass cost_function = model(model_input_batch) log.info('Forward pass graph built') # Define sampler sampler = model.build_sampler(sampler_input_batch) log.info('Sampler graph built') # Define optimizer optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) train_step = optimizer.minimize(cost_function) # Get gradients grads = optimizer.compute_gradients(cost_function) grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) log.info('Optimizer graph built') # # Get gradients # grad = optimizer.compute_gradients(cost_function) # # Clip gradients # clipped_grad = tf.clip_by_norm(grad, 5.0, name='grad_clipping') # # Update op # backpass = optimizer.apply_gradients(clipped_grad) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') # test_LL_and_DKL(sess, test_provider, model.DKL, model.rec_loss, options, model_input_batch) # return # if options['data_dir'] == 'MNIST': # mean_img = np.zeros(np.prod(options['img_shape'])) # std_img = np.ones(np.prod(options['img_shape'])) # else: # mean_img = np.load(os.path.join(options['data_dir'], 'mean' + options['extension'])) # std_img = np.load(os.path.join(options['data_dir'], 'std' + options['extension'])) # visualize(model.sampler_mean, sess, model.dec_mean, model.dec_log_std_sq, sampler, sampler_input_batch, # model_input_batch, model.enc_mean, model.enc_log_std_sq, # train_provider, val_provider, options, catalog, mean_img, std_img) # return else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs in train_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [cost_function, backpass, model.DKL, model.rec_loss, model.dec_log_std_sq, model.enc_log_std_sq, model.enc_mean, model.dec_mean], feed_dict = { model_input_batch: inputs } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # val_sig_log.flush() # print('\n\nENC_MEAN:') # print(result[3]) # print('\n\nENC_STD:') # print(result[2]) # print('\nDEC_MEAN:') # print(result[6]) # print('\nDEC_STD:') # print(result[5]) # print('\n\nENCODER WEIGHTS:') # print(model._encoder.layers[0].weights['w'].eval()) # print('\n\DECODER WEIGHTS:') # print(model._decoder.layers[0].weights['w'].eval()) # print(model._encoder.layers[0].weights['w'].eval()) # print(result[2]) # print(result[3]) # print(result[3]) # print(result[2]) # print(result[-2]) # print(result[-1]) # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) log.info('Batch Mean LL: {:0>15.4f}'.format(np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format(np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') save_dict = {} # Save encoder params ------------------------------------------------------------------ for i in range(len(model._encoder.layers)): layer_dict = { 'input_dim':model._encoder.layers[i].input_dim, 'output_dim':model._encoder.layers[i].output_dim, 'act_fn':model._encoder.layers[i].activation, 'W':model._encoder.layers[i].weights['w'].eval(), 'b':model._encoder.layers[i].weights['b'].eval() } save_dict['encoder'] = layer_dict layer_dict = { 'input_dim':model._enc_mean.input_dim, 'output_dim':model._enc_mean.output_dim, 'act_fn':model._enc_mean.activation, 'W':model._enc_mean.weights['w'].eval(), 'b':model._enc_mean.weights['b'].eval() } save_dict['enc_mean'] = layer_dict layer_dict = { 'input_dim':model._enc_log_std_sq.input_dim, 'output_dim':model._enc_log_std_sq.output_dim, 'act_fn':model._enc_log_std_sq.activation, 'W':model._enc_log_std_sq.weights['w'].eval(), 'b':model._enc_log_std_sq.weights['b'].eval() } save_dict['enc_log_std_sq'] = layer_dict # Save decoder params ------------------------------------------------------------------ for i in range(len(model._decoder.layers)): layer_dict = { 'input_dim':model._decoder.layers[i].input_dim, 'output_dim':model._decoder.layers[i].output_dim, 'act_fn':model._decoder.layers[i].activation, 'W':model._decoder.layers[i].weights['w'].eval(), 'b':model._decoder.layers[i].weights['b'].eval() } save_dict['decoder'] = layer_dict layer_dict = { 'input_dim':model._dec_mean.input_dim, 'output_dim':model._dec_mean.output_dim, 'act_fn':model._dec_mean.activation, 'W':model._dec_mean.weights['w'].eval(), 'b':model._dec_mean.weights['b'].eval() } save_dict['dec_mean'] = layer_dict layer_dict = { 'input_dim':model._dec_log_std_sq.input_dim, 'output_dim':model._dec_log_std_sq.output_dim, 'act_fn':model._dec_log_std_sq.activation, 'W':model._dec_log_std_sq.weights['w'].eval(), 'b':model._dec_log_std_sq.weights['b'].eval() } save_dict['dec_log_std_sq'] = layer_dict pickle.dump(save_dict, open(os.path.join(options['model_dir'], 'vae_dict_%d' % batch_abs_idx), 'wb')) # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: model._decoder.layers[0].weights['w'].eval()[:5,:5] valid_costs = [] seen_batches = 0 for val_batch in val_provider: if isinstance(val_batch, tuple): val_batch = val_batch[0] val_cost = sess.run( cost_function, feed_dict = { model_input_batch: val_batch } ) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) val_samples = sess.run( sampler, feed_dict = { sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples( catalog, np.reshape(result[7], [options['batch_size']]+options['img_shape']), np.reshape(inputs, [options['batch_size']]+options['img_shape']), np.reshape(val_samples, [options['batch_size']]+options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True ) # save_dash_samples( # catalog, # val_samples, # batch_abs_idx, # options['dashboard_dir'], # flat_samples=True, # img_shape=options['img_shape'], # num_to_save=5 # ) # save_samples( # val_samples, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'valid_samples'), # True, # options['img_shape'], # 5 # ) # save_samples( # inputs, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'input_sanity'), # True, # options['img_shape'], # num_to_save=5 # ) # save_samples( # result[7], # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'rec_sanity'), # True, # options['img_shape'], # num_to_save=5 # ) log.info('End of epoch {}'.format(epoch_idx + 1))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Train Loss train_acc.csv,csv,Train Accuracy val_loss.csv,csv,Validation Loss val_acc.csv,csv,Validation Accuracy """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') train_acc_log = open(os.path.join(options['dashboard_dir'], 'train_acc.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') val_acc_log = open(os.path.join(options['dashboard_dir'], 'val_acc.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') train_acc_log.write('step,time,Train Accuracy\n') val_acc_log.write('step,time,Validation Accuracy\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider num_data_points = len( os.listdir( os.path.join(options['data_dir'], 'train', 'info') ) ) num_data_points -= 2 train_provider = DataProvider( num_data_points, options['batch_size'], toolbox.CIFARLoader( data_dir = os.path.join(options['data_dir'], 'train'), flat=False ) ) # Valid provider num_data_points = len( os.listdir( os.path.join(options['data_dir'], 'valid', 'info') ) ) num_data_points -= 2 print(num_data_points) val_provider = DataProvider( num_data_points, options['batch_size'], toolbox.CIFARLoader( data_dir = os.path.join(options['data_dir'], 'valid'), flat=False ) ) log.info('Data providers initialized.') # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): model = cupboard(options['model'])( options['img_shape'], options['input_channels'], options['num_classes'], options['conv_params'], options['fc_params'], 'CIFAR_classifier' ) log.info('Model initialized') # Define inputs input_batch = tf.placeholder( tf.float32, shape = [options['batch_size']] + options['img_shape'] + [options['input_channels']], name = 'inputs' ) label_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['num_classes']], name = 'labels' ) log.info('Inputs defined') # Define forward pass cost_function, classifier = model(input_batch, label_batch) log.info('Forward pass graph built') # Define optimizer optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) train_step = optimizer.minimize(cost_function) log.info('Optimizer graph built') # # Get gradients # grad = optimizer.compute_gradients(cost_function) # # Clip gradients # clipped_grad = tf.clip_by_norm(grad, 5.0, name='grad_clipping') # # Update op # backpass = optimizer.apply_gradients(clipped_grad) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, os.path.join(options['model_dir'], 'model.ckpt')) log.info('Shared variables restored') else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) last_accs = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, labels in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 results = sess.run( [cost_function, classifier, train_step], feed_dict = { input_batch: inputs, label_batch: labels } ) cost = results[0] # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') return 1., 1., 1. accuracy = np.mean(np.argmax(results[1], axis=1) == np.argmax(labels, axis=1)) # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost last_accs = np.roll(last_accs, 1) last_accs[0] = accuracy if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) train_acc_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_accs))) train_log.flush() train_acc_log.flush() # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) log.info('Epoch {:02}/{:02} Batch {:03} Current Accuracy: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, np.mean(last_accs) )) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) save_dict = [] for c_ind in xrange(0, len(model._classifier_conv.layers), 2): layer_dict = { 'n_filters_in': model._classifier_conv.layers[c_ind].n_filters_in, 'n_filters_out': model._classifier_conv.layers[c_ind].n_filters_out, 'input_dim': model._classifier_conv.layers[c_ind].input_dim, 'filter_dim': model._classifier_conv.layers[c_ind].filter_dim, 'strides': model._classifier_conv.layers[c_ind].strides, 'padding': model._classifier_conv.layers[c_ind].padding, } save_dict.append(layer_dict) pickle.dump(save_dict, open(os.path.join(options['model_dir'], 'class_dict_%d' % batch_abs_idx), 'wb')) log.info('Model saved') # Save params for feature vae training later # conv_feat = deepcopy(model._classifier_conv) # for lay_ind in range(0,len(conv_feat.layers),2): # conv_feat[lay_ind].weights['W'] = tf.constant(conv_feat[lay_ind].weights['W'].eval()) # conv_feat[lay_ind].weights['b'] = tf.constant(conv_feat[lay_ind].weights['b'].eval()) # pickle(conv_feat, open(os.path.join(options['model_dir'], 'classifier_conv_feat_%d' % batch_abs_idx), 'wb')) # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] val_accuracies = [] seen_batches = 0 for val_batch, val_label in val_provider: # Break if 10 batches seen for now if seen_batches == options['valid_batches']: break val_results = sess.run( [cost_function, classifier], feed_dict = { input_batch: val_batch, label_batch: val_label } ) val_cost = val_results[0] valid_costs.append(val_cost) seen_batches += 1 val_accuracies.append(np.mean(np.argmax(val_results[1], axis=1) == np.argmax(val_label, axis=1))) # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) log.info('Validation Accuracy: {:0>15.4f}'.format( np.mean(val_accuracies) )) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', float(np.mean(valid_costs)))) val_acc_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(val_accuracies))) val_log.flush() val_acc_log.flush() log.info('End of epoch {}'.format(epoch_idx + 1))
from get_data import get_df_huzhu from utils import print_options help_message = '三组-0,' + '九组-1,' + '十四组-2,' + '十一组-3,' + '四组-4,' + '十三组-5,' + '六组-6,' + '七组-7,' + '八组-8, 二组-9, 一组-10' parser = argparse.ArgumentParser(description=help_message) parser.add_argument('--file', '-f', default=7, type=int, help='the file to solve') parser.add_argument('--testing', '-T', action='store_true', help='weather to testing') args = parser.parse_args() print_options(args, parser) pd.set_option('expand_frame_repr', False) pd.set_option('display.max_rows', 20) pd.set_option('precision', 2) testing = args.testing sheet_names = [ '三组', # 0 '九组', # 1 '十四组', # 2 '十一组', # 3 '四组', # 4 '十三组', # 5 '六组', # 6
help='The ID of the specified GPU') parser.add_argument('--lambda_T', type=float, default=0.01) parser.add_argument('--lambda_T_decay', type=float, default=0) parser.add_argument('--label_rotation', action='store_true') parser.add_argument('--disable_cudnn_benchmark', action='store_true') parser.add_argument('--feature_save', action='store_true') parser.add_argument('--feature_save_every', type=int, default=1) parser.add_argument('--feature_num_batches', type=int, default=1) parser.add_argument('--store_linear', action='store_true') parser.add_argument('--sample_trunc_normal', action='store_true') parser.add_argument('--separate', action='store_true') parser.add_argument('--mi_type_p', type=str, default='ce') parser.add_argument('--mi_type_q', type=str, default='ce') opt = parser.parse_args() print_options(parser, opt) # specify the gpu id if using only 1 gpu # if opt.ngpu == 1: # os.environ['CUDA_VISIBLE_DEVICES'] = str(opt.gpu_id) try: os.makedirs(opt.outf) except OSError: pass outff = os.path.join(opt.outf, 'features') if opt.feature_save or opt.store_linear: utils.mkdirs(outff) if opt.manualSeed is None:
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'Log Sigma^2 clipped to: [{}, {}]\n\n'.format( -options['sigma_clip'], options['sigma_clip'] ) ) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Discriminator Cross-Entropy train_acc.csv,csv,Discriminator Accuracy val_loss.csv,csv,Validation Cross-Entropy val_acc.csv,csv,Validation Accuracy """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') train_acc = open(os.path.join(options['dashboard_dir'], 'train_acc.csv'), 'w') val_acc = open(os.path.join(options['dashboard_dir'], 'val_acc.csv'), 'w') train_log.write('step,time,Train CE (Training Vanilla),Train CE (Training Gen.),Train CE (Training Disc.)\n') val_log.write('step,time,Validation CE (Training Vanilla),Validation CE (Training Gen.),Validation CE (Training Disc.)\n') train_acc.write('step,time,Train CE (Training Vanilla),Train CE (Training Gen.),Train CE (Training Disc.)\n') val_acc.write('step,time,Validation CE (Training Vanilla),Validation CE (Training Gen.),Validation Acc. (Training Disc.)\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs ------------------------------------------------------------------------- real_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], np.prod(np.array(options['img_shape']))], name = 'real_inputs' ) sampler_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['latent_dims']], name = 'noise_channel' ) labels = tf.constant( np.expand_dims( np.concatenate( ( np.ones(options['batch_size']), np.zeros(options['batch_size']) ), axis=0 ).astype(np.float32), axis=1 ) ) labels = tf.cast(labels, tf.float32) log.info('Inputs defined') # Define model -------------------------------------------------------------------------- with tf.variable_scope('gen_scope'): generator = Sequential('generator') generator += FullyConnected(options['latent_dims'], 60, tf.nn.tanh, name='fc_1') generator += FullyConnected(60, 60, tf.nn.tanh, name='fc_2') generator += FullyConnected(60, np.prod(options['img_shape']), tf.nn.tanh, name='fc_3') sampler = generator(sampler_input_batch) with tf.variable_scope('disc_scope'): disc_model = cupboard('fixed_conv_disc')( pickle.load(open(options['disc_params_path'], 'rb')), options['num_feat_layers'], name='disc_model' ) disc_inputs = tf.concat(0, [real_batch, sampler]) disc_inputs = tf.reshape( disc_inputs, [disc_inputs.get_shape()[0].value] + options['img_shape'] + [options['input_channels']] ) preds = disc_model(disc_inputs) preds = tf.clip_by_value(preds, 0.00001, 0.99999) # Disc Accuracy disc_accuracy = (1 / float(labels.get_shape()[0].value)) * tf.reduce_sum( tf.cast( tf.equal( tf.round(preds), labels ), tf.float32 ) ) # Define Losses ------------------------------------------------------------------------- # Discrimnator Cross-Entropy disc_CE = (1 / float(labels.get_shape()[0].value)) * tf.reduce_sum( -tf.add( tf.mul( labels, tf.log(preds) ), tf.mul( 1.0 - labels, tf.log(1.0 - preds) ) ) ) gen_loss = -tf.mul( 1.0 - labels, tf.log(preds) ) # Define Optimizers --------------------------------------------------------------------- optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) # Get Generator and Disriminator Trainable Variables gen_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'gen_scope') disc_train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'disc_scope') # Get generator gradients grads = optimizer.compute_gradients(gen_loss, gen_train_vars) grads = [gv for gv in grads if gv[0] != None] clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='gen_grad_clipping'), gv[1]) for gv in grads] gen_backpass = optimizer.apply_gradients(clip_grads) # Get Dsicriminator gradients grads = optimizer.compute_gradients(disc_CE, disc_train_vars) grads = [gv for gv in grads if gv[0] != None] clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='disc_grad_clipping'), gv[1]) for gv in grads] disc_backpass = optimizer.apply_gradients(clip_grads) log.info('Optimizer graph built') # -------------------------------------------------------------------------------------- # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload_all']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') else: sess.run(init_op) log.info('Variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) last_accs = np.zeros((10)) disc_tracker = np.ones((5000)) batch_abs_idx = 0 D_to_G = options['D_to_G'] total_D2G = sum(D_to_G) base = options['initial_G_iters'] + options['initial_D_iters'] # must_init = True feat_params = pickle.load(open(options['disc_params_path'], 'rb')) for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs in train_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_abs_idx += 1 batch_rel_idx += 1 if batch_abs_idx < options['initial_G_iters']: backpass = gen_backpass log_format_string = '{},{},{},,\n' elif options['initial_G_iters'] <= batch_abs_idx < base: backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: # if np.mean(disc_tracker) < 0.95: # disc_model._disc.layers[-2].re_init_weights(sess) # disc_tracker = np.ones((5000)) if (batch_abs_idx - base) % total_D2G < D_to_G[0]: # if must_init: # # i = 0 # # for j in xrange(options['num_feat_layers']): # # if feat_params[j]['layer_type'] == 'conv': # # disc_model._disc.layers[i].re_init_weights(sess) # # # print('@' * 1000) # # # print(disc_model._disc.layers[i]) # # i += 1 # for dealing with activation function # # elif feat_params[j]['layer_type'] == 'fc': # # disc_model._disc.layers[i].re_init_weights(sess) # # # print('@' * 1000) # # # print(disc_model._disc.layers[i]) # # i += 1 # disc_model._disc.layers[-2].re_init_weights(sess) # # print('@' * 1000) # # print(disc_model._disc.layers[-2]) # must_init = False backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: # must_init = True backpass = gen_backpass log_format_string = '{},{},,{},\n' log_format_string = '{},{},{},,\n' result = sess.run( [ disc_CE, backpass, disc_accuracy ], feed_dict = { real_batch: inputs, sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) train_acc.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(last_accs))) train_log.flush() train_acc.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost last_accs = np.roll(last_accs, 1) last_accs[0] = result[-1] disc_tracker = np.roll(disc_tracker, 1) disc_tracker[0] = result[-1] # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last accuracies: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_accs) )) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] valid_accs = [] seen_batches = 0 for val_batch in val_provider: if isinstance(val_batch, tuple): val_batch = val_batch[0] result = sess.run( [ disc_CE, disc_accuracy ], feed_dict = { real_batch: val_batch, sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) valid_costs.append(result[0]) valid_accs.append(result[-1]) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) log.info('Validation accuracies: {:0>15.4f}'.format( float(np.mean(valid_accs)) )) val_samples = sess.run( sampler, feed_dict = { sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) val_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_acc.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(valid_accs))) val_log.flush() val_acc.flush() save_ae_samples( catalog, np.ones([options['batch_size']]+options['img_shape']), np.reshape(inputs, [options['batch_size']]+options['img_shape']), np.reshape(val_samples, [options['batch_size']]+options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True ) log.info('End of epoch {}'.format(epoch_idx + 1))
# create directories output_dir = os.path.join("output", opt.folder_name) if not os.path.exists(output_dir): os.makedirs(output_dir) ckpt_dir = os.path.join(output_dir, "checkpoint") if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) loss_plot_dir = os.path.join(output_dir, "loss_plot") if not os.path.exists(loss_plot_dir): os.makedirs(loss_plot_dir) eval_dir = os.path.join(output_dir, "train_evaluation") if not os.path.exists(eval_dir): os.makedirs(eval_dir) # save opt print_options(opt, parser, ckpt_dir) if opt.cuda and not torch.cuda.is_available(): raise Exception("No GPU found, please run without --cuda") cudnn.benchmark = True torch.manual_seed(opt.seed) if opt.cuda: torch.cuda.manual_seed(opt.seed) # load datasets print('===> Loading datasets') root_path = "./" train_set = DatasetFromFolder(root_path + os.path.join(opt.dataset, opt.training_set))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'])) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write("""filename,type,name options,plain,Options train_loss.csv,csv,Discriminator Cross-Entropy ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write( 'step,time,Train CE (Training Vanilla),Train CE (Training Gen.),Train CE (Training Disc.)\n' ) val_log.write( 'step,time,Validation CE (Training Vanilla),Validation CE (Training Gen.),Validation CE (Training Disc.)\n' ) dkl_log.write( 'step,time,DKL (Training Vanilla),DKL (Training Gen.),DKL (Training Disc.)\n' ) ll_log.write( 'step,time,-LL (Training Vanilla),-LL (Training Gen.),-LL (Training Disc.)\n' ) dec_sig_log.write( 'step,time,Decoder Log Sigma^2 (Training Vanilla),Decoder Log Sigma^2 (Training Gen.),Decoder Log Sigma^2 (Training Disc.)\n' ) enc_sig_log.write( 'step,time,Encoder Log Sigma^2 (Training Vanilla),Encoder Log Sigma^2 (Training Gen.),Encoder Log Sigma^2 (Training Disc.)\n' ) dec_std_sig_log.write( 'step,time,STD of Decoder Log Sigma^2 (Training Vanilla),STD of Decoder Log Sigma^2 (Training Gen.),STD of Decoder Log Sigma^2 (Training Disc.)\n' ) enc_std_sig_log.write( 'step,time,STD of Encoder Log Sigma^2 (Training Vanilla),STD of Encoder Log Sigma^2 (Training Gen.),STD of Encoder Log Sigma^2 (Training Disc.)\n' ) dec_mean_log.write( 'step,time,Decoder Mean (Training Vanilla),Decoder Mean (Training Gen.),Decoder Mean (Training Disc.)\n' ) enc_mean_log.write( 'step,time,Encoder Mean (Training Vanilla),Encoder Mean (Training Gen.),Encoder Mean (Training Disc.)\n' ) # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs model_input_batch = tf.placeholder( tf.float32, shape=[ options['batch_size'], np.prod(np.array(options['img_shape'])) ], name='enc_inputs') sampler_input_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['latent_dims']], name='dec_inputs') log.info('Inputs defined') # Define model with tf.variable_scope('vae_scope'): vae_model = cupboard('vanilla_vae')( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vae_model') with tf.variable_scope('disc_scope'): disc_model = cupboard('fixed_conv_disc')( pickle.load(open(options['disc_params_path'], 'rb')), options['num_feat_layers'], name='disc_model') vae_gan = cupboard('vae_gan')(vae_model, disc_model, options['disc_weight'], options['img_shape'], options['input_channels'], 'vae_scope', 'disc_scope', name='vae_gan_model') # Define Optimizers --------------------------------------------------------------------- optimizer = tf.train.AdamOptimizer(learning_rate=options['lr']) vae_backpass, disc_backpass, vanilla_backpass = vae_gan( model_input_batch, sampler_input_batch, optimizer) log.info('Optimizer graph built') # -------------------------------------------------------------------------------------- # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload_all']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') else: sess.run(init_op) log.info('Variables initialized') if options['reload_vae']: vae_model.reload_vae(options['vae_params_path']) # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 D_to_G = options['D_to_G'] total_D2G = sum(D_to_G) base = options['initial_G_iters'] + options['initial_D_iters'] for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs in train_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_abs_idx += 1 batch_rel_idx += 1 if batch_abs_idx < options['initial_G_iters']: backpass = vanilla_backpass log_format_string = '{},{},{},,\n' elif options['initial_G_iters'] <= batch_abs_idx < base: backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: if (batch_abs_idx - base) % total_D2G < D_to_G[0]: backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: backpass = vae_backpass log_format_string = '{},{},,{},\n' result = sess.run( [ vae_gan.disc_CE, backpass, vae_gan._vae.DKL, vae_gan._vae.rec_loss, vae_gan._vae.dec_log_std_sq, vae_gan._vae.enc_log_std_sq, vae_gan._vae.enc_mean, vae_gan._vae.dec_mean ], feed_dict={ model_input_batch: inputs, sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(vae_gan._vae._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info( 'Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}' .format(epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses))) log.info('Batch Mean LL: {:0>15.4f}'.format( np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format( np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save( sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') save_dict = {} # Save encoder params ------------------------------------------------------------------ for i in range(len(vae_gan._vae._encoder.layers)): layer_dict = { 'input_dim': vae_gan._vae._encoder.layers[i].input_dim, 'output_dim': vae_gan._vae._encoder.layers[i].output_dim, 'act_fn': vae_gan._vae._encoder.layers[i].activation, 'W': vae_gan._vae._encoder.layers[i].weights['w'].eval( ), 'b': vae_gan._vae._encoder.layers[i].weights['b'].eval( ) } save_dict['encoder'] = layer_dict layer_dict = { 'input_dim': vae_gan._vae._enc_mean.input_dim, 'output_dim': vae_gan._vae._enc_mean.output_dim, 'act_fn': vae_gan._vae._enc_mean.activation, 'W': vae_gan._vae._enc_mean.weights['w'].eval(), 'b': vae_gan._vae._enc_mean.weights['b'].eval() } save_dict['enc_mean'] = layer_dict layer_dict = { 'input_dim': vae_gan._vae._enc_log_std_sq.input_dim, 'output_dim': vae_gan._vae._enc_log_std_sq.output_dim, 'act_fn': vae_gan._vae._enc_log_std_sq.activation, 'W': vae_gan._vae._enc_log_std_sq.weights['w'].eval(), 'b': vae_gan._vae._enc_log_std_sq.weights['b'].eval() } save_dict['enc_log_std_sq'] = layer_dict # Save decoder params ------------------------------------------------------------------ for i in range(len(vae_gan._vae._decoder.layers)): layer_dict = { 'input_dim': vae_gan._vae._decoder.layers[i].input_dim, 'output_dim': vae_gan._vae._decoder.layers[i].output_dim, 'act_fn': vae_gan._vae._decoder.layers[i].activation, 'W': vae_gan._vae._decoder.layers[i].weights['w'].eval( ), 'b': vae_gan._vae._decoder.layers[i].weights['b'].eval( ) } save_dict['decoder'] = layer_dict layer_dict = { 'input_dim': vae_gan._vae._dec_mean.input_dim, 'output_dim': vae_gan._vae._dec_mean.output_dim, 'act_fn': vae_gan._vae._dec_mean.activation, 'W': vae_gan._vae._dec_mean.weights['w'].eval(), 'b': vae_gan._vae._dec_mean.weights['b'].eval() } save_dict['dec_mean'] = layer_dict layer_dict = { 'input_dim': vae_gan._vae._dec_log_std_sq.input_dim, 'output_dim': vae_gan._vae._dec_log_std_sq.output_dim, 'act_fn': vae_gan._vae._dec_log_std_sq.activation, 'W': vae_gan._vae._dec_log_std_sq.weights['w'].eval(), 'b': vae_gan._vae._dec_log_std_sq.weights['b'].eval() } save_dict['dec_log_std_sq'] = layer_dict pickle.dump( save_dict, open( os.path.join(options['model_dir'], 'vae_dict_%d' % batch_abs_idx), 'wb')) # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: vae_gan._vae._decoder.layers[0].weights['w'].eval()[:5, :5] valid_costs = [] seen_batches = 0 for val_batch in val_provider: if isinstance(val_batch, tuple): val_batch = val_batch[0] val_cost = sess.run( vae_gan.disc_CE, feed_dict={ model_input_batch: val_batch, sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)))) val_samples = sess.run( vae_gan.sampler, feed_dict={ sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) val_log.write( log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples(catalog, np.reshape(result[7], [options['batch_size']] + options['img_shape']), np.reshape(inputs, [options['batch_size']] + options['img_shape']), np.reshape(val_samples, [options['batch_size']] + options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True) log.info('End of epoch {}'.format(epoch_idx + 1))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Train Loss val_loss.csv,csv,Validation Loss """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): model = cupboard(options['model'])( np.prod(options['img_shape']) * options['input_channels'], options['enc_params'], options['dec_params'], 'ae' ) log.info('Model initialized') # Define inputs model_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], np.prod(np.array(options['img_shape']))], name = 'inputs' ) model_label_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['num_classes']], name = 'labels' ) log.info('Inputs defined') # Load VAE model(model_input_batch, model_input_batch) feat_params = pickle.load(open(options['feat_params_path'], 'rb')) for i in range(len(model._encoder.layers)): model._encoder.layers[i].weights['w'] = tf.constant(feat_params['enc_W'][i]) model._encoder.layers[i].weights['b'] = tf.constant(feat_params['enc_b'][i]) classifier = FC( options['latent_dims'], options['num_classes'], activation=None, scale=0.01, name='classifier_fc' )(model.encoder) classifier = tf.nn.softmax(classifier) cost_function = -tf.mul(model_label_batch, tf.log(classifier)) cost_function = tf.reduce_sum(cost_function) cost_function *= 1 / float(options['batch_size']) log.info('Forward pass graph built') # Define optimizer optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) log.info('Optimizer graph built') # # Get gradients # grad = optimizer.compute_gradients(cost_function) # # Clip gradients # clipped_grad = tf.clip_by_norm(grad, 5.0, name='grad_clipping') # # Update op # backpass = optimizer.apply_gradients(clipped_grad) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, os.path.join(options['model_dir'], 'model.ckpt')) log.info('Shared variables restored') else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) last_accs = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, labels in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [cost_function, backpass, classifier] + [gv[0] for gv in grads], feed_dict = { model_input_batch: inputs, model_label_batch: labels } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) train_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost last_accs = np.roll(last_accs, 1) last_accs[0] = np.mean(np.argmax(labels, axis=1) == np.argmax(result[2], axis=1)) # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Acc.: {:0>15.4f} Mean last accs: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, last_accs[0], np.mean(last_accs) )) log.info('Batch Mean Loss: {:0>15.4f}'.format(np.mean(last_losses))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch, labels in val_provider: val_result = sess.run( [cost_function, classifier], feed_dict = { model_input_batch: val_batch, model_label_batch: labels } ) val_cost = np.mean(np.argmax(labels, axis=1) == np.argmax(val_result[1], axis=1)) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation acc.: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() log.info('End of epoch {}'.format(epoch_idx + 1)) # -------------------------------------------------------------------------- test_results = [] for inputs, labels in test_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_results = sess.run( [cost_function, classifier], feed_dict = { model_input_batch: inputs, model_label_batch: labels } ) test_results.append(np.mean(np.argmax(labels, axis=1) == np.argmax(batch_results[1], axis=1))) # Print results log.info('Test Accuracy: {:0>15.4f}'.format( np.mean(test_results) ))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'])) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write("""filename,type,name options,plain,Options train_loss.csv,csv,Train Loss ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') dkl_log.write('step,time,DKL\n') ll_log.write('step,time,-LL\n') dec_sig_log.write('step,time,Decoder Log Sigma^2\n') enc_sig_log.write('step,time,Encoder Log Sigma^2\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2\n') dec_mean_log.write('step,time,Decoder Mean\n') enc_mean_log.write('step,time,Encoder Mean\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider if options['data_dir'] != 'MNIST': num_data_points = len( os.listdir(os.path.join(options['data_dir'], 'train', 'patches'))) num_data_points -= 2 train_provider = DataProvider( num_data_points, options['batch_size'], toolbox.ImageLoader(data_dir=os.path.join(options['data_dir'], 'train', 'patches'), flat=True, extension=options['file_extension'])) # Valid provider num_data_points = len( os.listdir(os.path.join(options['data_dir'], 'valid', 'patches'))) num_data_points -= 2 val_provider = DataProvider( num_data_points, options['batch_size'], toolbox.ImageLoader(data_dir=os.path.join(options['data_dir'], 'valid', 'patches'), flat=True, extension=options['file_extension'])) else: train_provider = DataProvider( 55000, options['batch_size'], toolbox.MNISTLoader(mode='train', flat=True)) val_provider = DataProvider( 5000, options['batch_size'], toolbox.MNISTLoader(mode='validation', flat=True)) log.info('Data providers initialized.') # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): model = cupboard(options['model'])( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae') log.info('Model initialized') # Define inputs model_input_batch = tf.placeholder( tf.float32, shape=[ options['batch_size'], np.prod(np.array(options['img_shape'])) ], name='enc_inputs') model_label_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['num_classes']], name='labels') sampler_input_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['latent_dims']], name='dec_inputs') log.info('Inputs defined') # Define forward pass cost_function = model(model_input_batch) log.info('Forward pass graph built') # Define sampler sampler = model.build_sampler(sampler_input_batch) log.info('Sampler graph built') # Define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=options['lr']) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) enc_std = tf.exp(tf.mul(0.5, model.enc_log_std_sq)) classifier = FC( model.latent_dims, options['num_classes'], activation=None, scale=0.01, name='classifier_fc')(tf.add( tf.mul(tf.random_normal([model.n_samples, model.latent_dims]), enc_std), model.enc_mean)) classifier = tf.nn.softmax(classifier) cost_function = -tf.mul(model_label_batch, tf.log(classifier)) cost_function = tf.reduce_sum(cost_function) cost_function *= 1 / float(options['batch_size']) train_step = optimizer.minimize(cost_function) # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) log.info('Optimizer graph built') # # Get gradients # grad = optimizer.compute_gradients(cost_function) # # Clip gradients # clipped_grad = tf.clip_by_norm(grad, 5.0, name='grad_clipping') # # Update op # backpass = optimizer.apply_gradients(clipped_grad) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore sess.run(init_op) saver.restore( sess, os.path.join(options['model_dir'], 'model_at_21000.ckpt')) log.info('Shared variables restored') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, labels in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [ cost_function, backpass, model.DKL, model.rec_loss, model.dec_log_std_sq, model.enc_log_std_sq, model.enc_mean, model.dec_mean, classifier ] + [gv[0] for gv in grads], feed_dict={ model_input_batch: inputs, model_label_batch: labels }) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean( np.argmax(labels, axis=1) == np.argmax(result[8], axis=1)))) dkl_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # val_sig_log.flush() # print('\n\nENC_MEAN:') # print(result[3]) # print('\n\nENC_STD:') # print(result[2]) # print('\nDEC_MEAN:') # print(result[6]) # print('\nDEC_STD:') # print(result[5]) # print('\n\nENCODER WEIGHTS:') # print(model._encoder.layers[0].weights['w'].eval()) # print('\n\DECODER WEIGHTS:') # print(model._decoder.layers[0].weights['w'].eval()) # print(model._encoder.layers[0].weights['w'].eval()) # print(result[2]) # print(result[3]) # print(result[3]) # print(result[2]) # print(result[-2]) # print(result[-1]) # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info( 'Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}' .format(epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses))) log.info('Batch Mean LL: {:0>15.4f}'.format( np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format( np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save( sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch, val_labels in val_provider: val_result = sess.run( [cost_function, classifier], feed_dict={ model_input_batch: val_batch, model_label_batch: val_labels }) val_cost = val_result[0] valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)))) val_samples = sess.run( sampler, feed_dict={ sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) val_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean( np.argmax(val_labels, axis=1) == np.argmax( val_result[1], axis=1)))) val_log.flush() save_ae_samples(catalog, np.reshape(result[7], [options['batch_size']] + options['img_shape']), np.reshape(inputs, [options['batch_size']] + options['img_shape']), np.reshape(val_samples, [options['batch_size']] + options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True) # save_dash_samples( # catalog, # val_samples, # batch_abs_idx, # options['dashboard_dir'], # flat_samples=True, # img_shape=options['img_shape'], # num_to_save=5 # ) save_samples( val_samples, int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'valid_samples'), True, options['img_shape'], 5) save_samples( inputs, int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'input_sanity'), True, options['img_shape'], num_to_save=5) save_samples( result[7], int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'rec_sanity'), True, options['img_shape'], num_to_save=5) log.info('End of epoch {}'.format(epoch_idx + 1))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Train Loss val_loss.csv,csv,Validation Loss """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log) # Initialize model ------------------------------------------------------------------ # input_shape, input_channels, enc_params, dec_params, name='' with tf.device('/gpu:0'): if options['model'] == 'cnn_ae': model = cupboard(options['model'])( options['img_shape'], options['input_channels'], options['enc_params'], options['dec_params'], 'cnn_ae' ) # Define inputs model_clean_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size']] + options['img_shape'] + [options['input_channels']], name = 'clean' ) model_noisy_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size']] + options['img_shape'] + [options['input_channels']], name = 'noisy' ) log.info('Inputs defined') else: model = cupboard(options['model'])( np.prod(options['img_shape']) * options['input_channels'], options['enc_params'], options['dec_params'], 'ae' ) # Define inputs model_clean_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size']] + [np.prod(options['img_shape']) * options['input_channels']], name = 'clean' ) model_noisy_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size']] + [np.prod(options['img_shape']) * options['input_channels']], name = 'noisy' ) log.info('Inputs defined') log.info('Model initialized') # Define forward pass print(model_clean_input_batch.get_shape()) print(model_noisy_input_batch.get_shape()) cost_function = model(model_clean_input_batch, model_noisy_input_batch) log.info('Forward pass graph built') log.info('Sampler graph built') # Define optimizer optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) log.info('Optimizer graph built') # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, os.path.join(options['model_dir'], 'model.ckpt')) log.info('Shared variables restored') else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs,_ in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( [cost_function, backpass] + [gv[0] for gv in grads], feed_dict = { model_clean_input_batch: inputs, model_noisy_input_batch: np.float32(inputs) + \ normal( loc=0.0, scale=np.float32(options['noise_std']), size=inputs.shape ) } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) train_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Save Encoder Params save_dict = { 'enc_W': [], 'enc_b': [], 'enc_act_fn': [], } if options['model'] == 'cnn_ae': pass else: for i in range(len(model._encoder.layers)): save_dict['enc_W'].append(model._encoder.layers[i].weights['w'].eval()) save_dict['enc_b'].append(model._encoder.layers[i].weights['b'].eval()) save_dict['enc_act_fn'].append(options['enc_params']['act_fn'][i]) pickle.dump(save_dict, open(os.path.join(options['model_dir'], 'enc_dict_%d' % batch_abs_idx), 'wb')) # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: model._decoder.layers[0].weights['w'].eval()[:5,:5] valid_costs = [] seen_batches = 0 for val_batch,_ in val_provider: noisy_val_batch = val_batch + \ normal( loc=0.0, scale=np.float32(options['noise_std']), size=val_batch.shape ) val_results = sess.run( (cost_function, model.decoder), feed_dict = { model_clean_input_batch: val_batch, model_noisy_input_batch: noisy_val_batch } ) valid_costs.append(val_results[0]) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() if options['model'] == 'conv_ae': val_recon = np.reshape( val_results[-1], val_batch.shape ) else: val_batch = np.reshape( val_batch, [val_batch.shape[0]] + options['img_shape'] + [options['input_channels']] ) noisy_val_batch = np.reshape( noisy_val_batch, [val_batch.shape[0]] + options['img_shape'] + [options['input_channels']] ) val_recon = np.reshape( val_results[-1], [val_batch.shape[0]] + options['img_shape'] + [options['input_channels']] ) save_ae_samples( catalog, val_batch, noisy_val_batch, val_recon, batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True ) # save_samples( # val_recon, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'valid_samples'), # False, # options['img_shape'], # 5 # ) # save_samples( # inputs, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'input_sanity'), # False, # options['img_shape'], # num_to_save=5 # ) log.info('End of epoch {}'.format(epoch_idx + 1))
parser.add_argument("--save_interval", type=int, default=1, help='checkpoint save interval') parser.add_argument("--layer", default="r31", help='which features to transfer, either r31 or r41') parser.add_argument("--checkpoints_dir", default="", help='the dir saves checkpoints') ################# PREPARATIONS ################# opt = parser.parse_args() opt.content_layers = opt.content_layers.split(',') opt.style_layers = opt.style_layers.split(',') opt.cuda = torch.cuda.is_available() if(opt.cuda): torch.cuda.set_device(opt.gpu_id) cudnn.benchmark = True print_options(opt) device = 'cuda' ################# DATA ################# content_dataset = MonkaaDataset(opt.contentPath,opt.loadSize,opt.fineSize) content_loader = torch.utils.data.DataLoader(dataset = content_dataset, batch_size = opt.batchSize, shuffle = True, num_workers = 1, drop_last = True) style_dataset = Dataset(opt.stylePath,opt.loadSize,opt.fineSize) style_loader_ = torch.utils.data.DataLoader(dataset = style_dataset, batch_size = opt.batchSize, shuffle = True, num_workers = 1, drop_last = True)
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'])) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write("""filename,type,name options,plain,Options train_loss.csv,csv,Train Loss val_loss.csv,csv,Validation Loss """) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): model = cupboard(options['model'])( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae') log.info('Model initialized') # Define inputs model_input_batch = tf.placeholder( tf.float32, shape=[ options['batch_size'], np.prod(np.array(options['img_shape'])) ], name='enc_inputs') model_label_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['num_classes']], name='labels') log.info('Inputs defined') # Load VAE model(model_input_batch) feat_params = pickle.load(open(options['feat_params_path'], 'rb')) for i in range(len(model._encoder.layers)): model._encoder.layers[i].weights['w'] = tf.constant( feat_params[i]['W']) model._encoder.layers[i].weights['b'] = tf.constant( feat_params[i]['b']) model._enc_mean.weights['w'] = tf.constant(feat_params[-2]['W']) model._enc_mean.weights['b'] = tf.constant(feat_params[-2]['b']) model._enc_log_std_sq.weights['w'] = tf.constant(feat_params[-1]['W']) model._enc_log_std_sq.weights['b'] = tf.constant(feat_params[-1]['b']) enc_std = tf.exp(tf.mul(0.5, model.enc_log_std_sq)) classifier = FC( model.latent_dims, options['num_classes'], activation=None, scale=0.01, name='classifier_fc')(tf.add( tf.mul(tf.random_normal([model.n_samples, model.latent_dims]), enc_std), model.enc_mean)) classifier = tf.nn.softmax(classifier) cost_function = -tf.mul(model_label_batch, tf.log(classifier)) cost_function = tf.reduce_sum(cost_function) cost_function *= 1 / float(options['batch_size']) log.info('Forward pass graph built') # Define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=options['lr']) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) log.info('Optimizer graph built') # # Get gradients # grad = optimizer.compute_gradients(cost_function) # # Clip gradients # clipped_grad = tf.clip_by_norm(grad, 5.0, name='grad_clipping') # # Update op # backpass = optimizer.apply_gradients(clipped_grad) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, os.path.join(options['model_dir'], 'model.ckpt')) log.info('Shared variables restored') else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) last_accs = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, labels in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [cost_function, backpass, classifier] + [gv[0] for gv in grads], feed_dict={ model_input_batch: inputs, model_label_batch: labels }) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(last_losses))) train_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost last_accs = np.roll(last_accs, 1) last_accs[0] = np.mean( np.argmax(labels, axis=1) == np.argmax(result[2], axis=1)) # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info( 'Epoch {:02}/{:02} Batch {:03} Current Acc.: {:0>15.4f} Mean last accs: {:0>15.4f}' .format(epoch_idx + 1, options['n_epochs'], batch_abs_idx, last_accs[0], np.mean(last_accs))) log.info('Batch Mean Loss: {:0>15.4f}'.format( np.mean(last_losses))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save( sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch, labels in val_provider: val_result = sess.run( [cost_function, classifier], feed_dict={ model_input_batch: val_batch, model_label_batch: labels }) val_cost = np.mean( np.argmax(labels, axis=1) == np.argmax( val_result[1], axis=1)) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation acc.: {:0>15.4f}'.format( float(np.mean(valid_costs)))) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() log.info('End of epoch {}'.format(epoch_idx + 1)) # -------------------------------------------------------------------------- test_results = [] for inputs, labels in test_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_results = sess.run([cost_function, classifier], feed_dict={ model_input_batch: inputs, model_label_batch: labels }) test_results.append( np.mean( np.argmax(labels, axis=1) == np.argmax(batch_results[1], axis=1))) # Print results log.info('Test Accuracy: {:0>15.4f}'.format(np.mean(test_results)))
default=40) parser.add_argument( '--correct_w', help= "for white module, correct error modules' gray value to correct_w (correct_w > discriminate_w)", type=int, default=220) parser.add_argument( '--use_activation_mechanism', help= "whether to use the activation mechanism (1 means use and other numbers mean not)", type=int, default=1) args = parser.parse_args() utils.print_options(opt=args) artcoder(STYLE_IMG_PATH=args.style_img_path, CONTENT_IMG_PATH=args.content_img_path, CODE_PATH=args.code_img_path, OUTPUT_DIR=args.output_dir, LEARNING_RATE=args.learning_rate, CONTENT_WEIGHT=args.content_weight, STYLE_WEIGHT=args.style_weight, CODE_WEIGHT=args.code_weight, MODULE_SIZE=args.module_size, MODULE_NUM=args.module_number, EPOCHS=args.epoch, Dis_b=args.discriminate_b, Dis_w=args.discriminate_w, Correct_b=args.correct_b,
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'])) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() with open(os.path.join(options['dashboard_dir'], 'description'), 'w') as desc_file: desc_file.write(options['description']) # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write("""filename,type,name description,plain,Description options,plain,Options train_loss.csv,csv,Train Loss ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') dkl_log.write('step,time,DKL\n') ll_log.write('step,time,-LL\n') dec_sig_log.write('step,time,Decoder Log Sigma^2\n') enc_sig_log.write('step,time,Encoder Log Sigma^2\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2\n') dec_mean_log.write('step,time,Decoder Mean\n') enc_mean_log.write('step,time,Encoder Mean\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs ---------------------------------------------------------- model_input_batch = tf.placeholder( tf.float32, shape=[ options['batch_size'], np.prod(np.array(options['img_shape'])) ], name='enc_inputs') sampler_input_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['latent_dims']], name='dec_inputs') log.info('Inputs defined') # Discriminator --------------------------------------------------------- # with tf.variable_scope('disc_scope'): # disc_model = cupboard('fixed_conv_disc')( # pickle.load(open(options['feat_params_path'], 'rb')), # options['num_feat_layers'], # 'discriminator' # ) # VAE ------------------------------------------------------------------- # VAE model # with tf.variable_scope('vae_scope'): vae_model = cupboard('vanilla_vae')( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae') # VAE/GAN --------------------------------------------------------------- # vae_gan = cupboard('vae_gan')( # vae_model, # disc_model, # options['img_shape'], # options['input_channels'], # 'vae_scope', # 'disc_scope', # name = 'vae_gan_model' # ) log.info('Model initialized') # Define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=options['lr']) # Define forward pass cost_function = vae_model(model_input_batch) # backpass, grads = vae_gan(model_input_batch, sampler_input_batch, optimizer) log.info('Forward pass graph built') # Define sampler # sampler = vae_gan.sampler sampler = vae_model.build_sampler(sampler_input_batch) log.info('Sampler graph built') # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) log.info('Optimizer graph built') # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') test_LL_and_DKL(sess, test_provider, feat_vae.vae.DKL, feat_vae.vae.rec_loss, options, model_input_batch) return mean_img = np.load( os.path.join(options['data_dir'], 'mean' + options['extension'])) std_img = np.load( os.path.join(options['data_dir'], 'std' + options['extension'])) visualize(sess, feat_vae.vae.dec_mean, feat_vae.vae.dec_log_std_sq, sampler, sampler_input_batch, model_input_batch, feat_vae.vae.enc_mean, feat_vae.vae.enc_log_std_sq, train_provider, val_provider, options, catalog, mean_img, std_img) return else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 D_to_G = options['D_to_G'] total_D2G = sum(D_to_G) for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, _ in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 # if batch_abs_idx < options['initial_G_iters']: # optimizer = vae_optimizer # else: # optimizer = disc_optimizer # if batch_abs_idx % total_D2G < D_to_G[0]: # optimizer = disc_optimizer # else: # optimizer = vae_optimizer result = sess.run([ cost_function, backpass, vae_model.DKL, vae_model.rec_loss, vae_model.dec_log_std_sq, vae_model.enc_log_std_sq, vae_model.enc_mean, vae_model.dec_mean, ] + [gv[0] for gv in grads], feed_dict={model_input_batch: inputs}) # print('#'*80) # print(result[-1]) # print('#'*80) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # val_sig_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info( 'Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}' .format(epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses))) log.info('Batch Mean LL: {:0>15.4f}'.format( np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format( np.mean(result[2], axis=0))) # log.info('Batch Mean Acc.: {:0>15.4f}'.format(result[-2], axis=0)) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save( sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch, _ in val_provider: val_cost = sess.run( vae_model.cost, feed_dict={ model_input_batch: val_batch, sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)))) val_samples = sess.run( sampler, feed_dict={ sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples(catalog, np.reshape(result[7], [options['batch_size']] + options['img_shape']), np.reshape(inputs, [options['batch_size']] + options['img_shape']), np.reshape(val_samples, [options['batch_size']] + options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True) # save_samples( # val_samples, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'valid_samples'), # True, # options['img_shape'], # 5 # ) # save_samples( # inputs, # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'input_sanity'), # True, # options['img_shape'], # num_to_save=5 # ) # save_samples( # result[8], # int(batch_abs_idx/options['freq_validation']), # os.path.join(options['model_dir'], 'rec_sanity'), # True, # options['img_shape'], # num_to_save=5 # ) log.info('End of epoch {}'.format(epoch_idx + 1)) # Test Model -------------------------------------------------------------------------- test_results = [] for inputs in test_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_results = sess.run([ feat_vae.vae.DKL, feat_vae.vae.rec_loss, feat_vae.vae.dec_log_std_sq, feat_vae.vae.enc_log_std_sq, feat_vae.vae.dec_mean, feat_vae.vae.enc_mean ], feed_dict={model_input_batch: inputs}) test_results.append( map( lambda p: np.mean(p, axis=1) if len(p.shape) > 1 else np.mean(p), batch_results)) test_results = map(list, zip(*test_results)) # Print results log.info('Test Mean Rec. Loss: {:0>15.4f}'.format( float(np.mean(test_results[1])))) log.info('Test DKL: {:0>15.4f}'.format(float(np.mean( test_results[0])))) log.info('Test Dec. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[2])))) log.info('Test Enc. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[3])))) log.info('Test Dec. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[4])))) log.info('Test Enc. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[5]))))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'] ) ) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Train Loss ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') dkl_log.write('step,time,DKL\n') ll_log.write('step,time,-LL\n') dec_sig_log.write('step,time,Decoder Log Sigma^2\n') enc_sig_log.write('step,time,Encoder Log Sigma^2\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2\n') dec_mean_log.write('step,time,Decoder Mean\n') enc_mean_log.write('step,time,Encoder Mean\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs ---------------------------------------------------------- model_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], np.prod(np.array(options['img_shape']))], name = 'enc_inputs' ) sampler_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['latent_dims']], name = 'dec_inputs' ) log.info('Inputs defined') # Feature Extractor ----------------------------------------------------- feat_layers = [] feat_params = pickle.load(open(options['feat_params_path'], 'rb')) _classifier = Sequential('CNN_Classifier') conv_count, pool_count, fc_count = 0, 0, 0 for lay in feat_params: print(lay['layer_type']) for i in xrange(options['num_feat_layers']): if feat_params[i]['layer_type'] == 'conv': _classifier += ConvLayer( feat_params[i]['n_filters_in'], feat_params[i]['n_filters_out'], feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_conv_%d' % conv_count ) _classifier.layers[-1].weights['W'] = tf.constant(feat_params[i]['W']) _classifier.layers[-1].weights['b'] = tf.constant(feat_params[i]['b']) _classifier += feat_params[i]['act_fn'] conv_count += 1 elif feat_params[i]['layer_type'] == 'pool': _classifier += PoolLayer( feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_pool_%d' % i ) pool_count += 1 feat_layers.append(i) elif feat_params[i]['layer_type'] == 'fc': _classifier += ConstFC( feat_params[i]['W'], feat_params[i]['b'], activation=feat_params[i]['act_fn'], name='classifier_fc_%d' % fc_count ) fc_count += 1 feat_layers.append(i) # if options['feat_type'] == 'fc': # feat_model = Sequential('feat_extractor') # feat_params = pickle.load(open(options['feat_params_path'], 'rb')) # for i in range(options['num_feat_layers']): # feat_model += ConstFC( # feat_params['enc_W'][i], # feat_params['enc_b'][i], # activation=feat_params['enc_act_fn'][i], # name='feat_layer_%d'%i # ) # else: # pass # VAE ------------------------------------------------------------------- # VAE model vae_model = cupboard('vanilla_vae')( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae' ) # ----------------------------------------------------------------------- feat_vae = cupboard('feat_vae')( vae_model, _classifier, feat_layers, options['DKL_weight'], options['vae_rec_loss_weight'], img_shape=options['img_shape'], input_channels=options['input_channels'], flat=False, name='feat_vae_model' ) log.info('Model initialized') # Define forward pass cost_function = feat_vae(model_input_batch) log.info('Forward pass graph built') # Define sampler sampler = feat_vae.build_sampler(sampler_input_batch) log.info('Sampler graph built') # Define optimizer optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) log.info('Optimizer graph built') # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') test_LL_and_DKL(sess, test_provider, feat_vae.vae.DKL, feat_vae.vae.rec_loss, options, model_input_batch) return mean_img = np.load(os.path.join(options['data_dir'], 'mean' + options['extension'])) std_img = np.load(os.path.join(options['data_dir'], 'std' + options['extension'])) visualize(sess, feat_vae.vae.dec_mean, feat_vae.vae.dec_log_std_sq, sampler, sampler_input_batch, model_input_batch, feat_vae.vae.enc_mean, feat_vae.vae.enc_log_std_sq, train_provider, val_provider, options, catalog, mean_img, std_img) return else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs,_ in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [cost_function, backpass, feat_vae.vae.DKL, feat_vae.vae.rec_loss, feat_vae.vae.dec_log_std_sq, feat_vae.vae.enc_log_std_sq, feat_vae.vae.enc_mean, feat_vae.vae.dec_mean] + [gv[0] for gv in grads], feed_dict = { model_input_batch: inputs } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # val_sig_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) log.info('Batch Mean LL: {:0>15.4f}'.format(np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format(np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch,_ in val_provider: val_cost = sess.run( cost_function, feed_dict = { model_input_batch: val_batch } ) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) val_samples = sess.run( sampler, feed_dict = { sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples( catalog, np.reshape(result[7], [options['batch_size']]+options['img_shape']), np.reshape(inputs, [options['batch_size']]+options['img_shape']), np.reshape(val_samples, [options['batch_size']]+options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True ) # save_dash_samples( # catalog, # val_samples, # batch_abs_idx, # options['dashboard_dir'], # flat_samples=True, # img_shape=options['img_shape'], # num_to_save=5 # ) save_samples( val_samples, int(batch_abs_idx/options['freq_validation']), os.path.join(options['model_dir'], 'valid_samples'), True, options['img_shape'], 5 ) save_samples( inputs, int(batch_abs_idx/options['freq_validation']), os.path.join(options['model_dir'], 'input_sanity'), True, options['img_shape'], num_to_save=5 ) save_samples( result[7], int(batch_abs_idx/options['freq_validation']), os.path.join(options['model_dir'], 'rec_sanity'), True, options['img_shape'], num_to_save=5 ) log.info('End of epoch {}'.format(epoch_idx + 1)) # Test Model -------------------------------------------------------------------------- test_results = [] for inputs in test_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_results = sess.run( [ feat_vae.vae.DKL, feat_vae.vae.rec_loss, feat_vae.vae.dec_log_std_sq, feat_vae.vae.enc_log_std_sq, feat_vae.vae.dec_mean, feat_vae.vae.enc_mean ], feed_dict = { model_input_batch: inputs } ) test_results.append(map(lambda p: np.mean(p, axis=1) if len(p.shape) > 1 else np.mean(p), batch_results)) test_results = map(list, zip(*test_results)) # Print results log.info('Test Mean Rec. Loss: {:0>15.4f}'.format( float(np.mean(test_results[1])) )) log.info('Test DKL: {:0>15.4f}'.format( float(np.mean(test_results[0])) )) log.info('Test Dec. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[2])) )) log.info('Test Enc. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[3])) )) log.info('Test Dec. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[4])) )) log.info('Test Enc. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[5])) ))
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'] ) ) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write( """filename,type,name options,plain,Options train_loss.csv,csv,Discriminator Cross-Entropy ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """ ) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open(os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open(os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train CE (Training Vanilla),Train CE (Training Gen.),Train CE (Training Disc.)\n') val_log.write('step,time,Validation CE (Training Vanilla),Validation CE (Training Gen.),Validation CE (Training Disc.)\n') dkl_log.write('step,time,DKL (Training Vanilla),DKL (Training Gen.),DKL (Training Disc.)\n') ll_log.write('step,time,-LL (Training Vanilla),-LL (Training Gen.),-LL (Training Disc.)\n') dec_sig_log.write('step,time,Decoder Log Sigma^2 (Training Vanilla),Decoder Log Sigma^2 (Training Gen.),Decoder Log Sigma^2 (Training Disc.)\n') enc_sig_log.write('step,time,Encoder Log Sigma^2 (Training Vanilla),Encoder Log Sigma^2 (Training Gen.),Encoder Log Sigma^2 (Training Disc.)\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2 (Training Vanilla),STD of Decoder Log Sigma^2 (Training Gen.),STD of Decoder Log Sigma^2 (Training Disc.)\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2 (Training Vanilla),STD of Encoder Log Sigma^2 (Training Gen.),STD of Encoder Log Sigma^2 (Training Disc.)\n') dec_mean_log.write('step,time,Decoder Mean (Training Vanilla),Decoder Mean (Training Gen.),Decoder Mean (Training Disc.)\n') enc_mean_log.write('step,time,Encoder Mean (Training Vanilla),Encoder Mean (Training Gen.),Encoder Mean (Training Disc.)\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs model_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], np.prod(np.array(options['img_shape']))], name = 'enc_inputs' ) sampler_input_batch = tf.placeholder( tf.float32, shape = [options['batch_size'], options['latent_dims']], name = 'dec_inputs' ) log.info('Inputs defined') # Define model with tf.variable_scope('vae_scope'): vae_model = cupboard('vanilla_vae')( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vae_model' ) with tf.variable_scope('disc_scope'): disc_model = cupboard('fixed_conv_disc')( pickle.load(open(options['disc_params_path'], 'rb')), options['num_feat_layers'], name='disc_model' ) vae_gan = cupboard('vae_gan')( vae_model, disc_model, options['disc_weight'], options['img_shape'], options['input_channels'], 'vae_scope', 'disc_scope', name='vae_gan_model' ) # Define Optimizers --------------------------------------------------------------------- optimizer = tf.train.AdamOptimizer( learning_rate=options['lr'] ) vae_backpass, disc_backpass, vanilla_backpass = vae_gan(model_input_batch, sampler_input_batch, optimizer) log.info('Optimizer graph built') # -------------------------------------------------------------------------------------- # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload_all']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') else: sess.run(init_op) log.info('Variables initialized') if options['reload_vae']: vae_model.reload_vae(options['vae_params_path']) # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 D_to_G = options['D_to_G'] total_D2G = sum(D_to_G) base = options['initial_G_iters'] + options['initial_D_iters'] for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs in train_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_abs_idx += 1 batch_rel_idx += 1 if batch_abs_idx < options['initial_G_iters']: backpass = vanilla_backpass log_format_string = '{},{},{},,\n' elif options['initial_G_iters'] <= batch_abs_idx < base: backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: if (batch_abs_idx - base) % total_D2G < D_to_G[0]: backpass = disc_backpass log_format_string = '{},{},,,{}\n' else: backpass = vae_backpass log_format_string = '{},{},,{},\n' result = sess.run( [ vae_gan.disc_CE, backpass, vae_gan._vae.DKL, vae_gan._vae.rec_loss, vae_gan._vae.dec_log_std_sq, vae_gan._vae.enc_log_std_sq, vae_gan._vae.enc_mean, vae_gan._vae.dec_mean ], feed_dict = { model_input_batch: inputs, sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(vae_gan._vae._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info('Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}'.format( epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses) )) log.info('Batch Mean LL: {:0>15.4f}'.format(np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format(np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save(sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') save_dict = {} # Save encoder params ------------------------------------------------------------------ for i in range(len(vae_gan._vae._encoder.layers)): layer_dict = { 'input_dim':vae_gan._vae._encoder.layers[i].input_dim, 'output_dim':vae_gan._vae._encoder.layers[i].output_dim, 'act_fn':vae_gan._vae._encoder.layers[i].activation, 'W':vae_gan._vae._encoder.layers[i].weights['w'].eval(), 'b':vae_gan._vae._encoder.layers[i].weights['b'].eval() } save_dict['encoder'] = layer_dict layer_dict = { 'input_dim':vae_gan._vae._enc_mean.input_dim, 'output_dim':vae_gan._vae._enc_mean.output_dim, 'act_fn':vae_gan._vae._enc_mean.activation, 'W':vae_gan._vae._enc_mean.weights['w'].eval(), 'b':vae_gan._vae._enc_mean.weights['b'].eval() } save_dict['enc_mean'] = layer_dict layer_dict = { 'input_dim':vae_gan._vae._enc_log_std_sq.input_dim, 'output_dim':vae_gan._vae._enc_log_std_sq.output_dim, 'act_fn':vae_gan._vae._enc_log_std_sq.activation, 'W':vae_gan._vae._enc_log_std_sq.weights['w'].eval(), 'b':vae_gan._vae._enc_log_std_sq.weights['b'].eval() } save_dict['enc_log_std_sq'] = layer_dict # Save decoder params ------------------------------------------------------------------ for i in range(len(vae_gan._vae._decoder.layers)): layer_dict = { 'input_dim':vae_gan._vae._decoder.layers[i].input_dim, 'output_dim':vae_gan._vae._decoder.layers[i].output_dim, 'act_fn':vae_gan._vae._decoder.layers[i].activation, 'W':vae_gan._vae._decoder.layers[i].weights['w'].eval(), 'b':vae_gan._vae._decoder.layers[i].weights['b'].eval() } save_dict['decoder'] = layer_dict layer_dict = { 'input_dim':vae_gan._vae._dec_mean.input_dim, 'output_dim':vae_gan._vae._dec_mean.output_dim, 'act_fn':vae_gan._vae._dec_mean.activation, 'W':vae_gan._vae._dec_mean.weights['w'].eval(), 'b':vae_gan._vae._dec_mean.weights['b'].eval() } save_dict['dec_mean'] = layer_dict layer_dict = { 'input_dim':vae_gan._vae._dec_log_std_sq.input_dim, 'output_dim':vae_gan._vae._dec_log_std_sq.output_dim, 'act_fn':vae_gan._vae._dec_log_std_sq.activation, 'W':vae_gan._vae._dec_log_std_sq.weights['w'].eval(), 'b':vae_gan._vae._dec_log_std_sq.weights['b'].eval() } save_dict['dec_log_std_sq'] = layer_dict pickle.dump(save_dict, open(os.path.join(options['model_dir'], 'vae_dict_%d' % batch_abs_idx), 'wb')) # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: vae_gan._vae._decoder.layers[0].weights['w'].eval()[:5,:5] valid_costs = [] seen_batches = 0 for val_batch in val_provider: if isinstance(val_batch, tuple): val_batch = val_batch[0] val_cost = sess.run( vae_gan.disc_CE, feed_dict = { model_input_batch: val_batch, sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)) )) val_samples = sess.run( vae_gan.sampler, feed_dict = { sampler_input_batch: MVN( np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size = options['batch_size'] ) } ) val_log.write(log_format_string.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples( catalog, np.reshape(result[7], [options['batch_size']]+options['img_shape']), np.reshape(inputs, [options['batch_size']]+options['img_shape']), np.reshape(val_samples, [options['batch_size']]+options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True ) log.info('End of epoch {}'.format(epoch_idx + 1))