def __init__(self, hp, rest, parser): start_time = time.time() self.oracle = HyperParameterSelector(hp, rest, parser, self.name) with self.oracle.synchronization(): self.data = EmbeddingData(self.name, self.oracle.args['batch_size']) self.model = NGramModel(self.data, self.oracle) self.loss = get_loss('cos', self.data, self.model) self.global_step = tf.train.get_or_create_global_step() self.learning_rate = tf.train.exponential_decay( self.oracle.args['learning_rate'], self.global_step, self.oracle.args['decay_epoch'] * self.data.total_step, self.oracle.args['decay_rate'], staircase=True) self.optimizer = get_opt(self.oracle.args['optimizer'], self.learning_rate) grads_and_vars = self.optimizer.compute_gradients(self.loss) # gradients, variables = list(zip(*grads_and_vars)) self.train_op = self.optimizer.apply_gradients( grads_and_vars, self.global_step) self.saver = tf.train.Saver(tf.global_variables()) self.summaries_op = tf.summary.merge([ tf.summary.scalar('loss', self.loss), tf.summary.scalar('learning_rate', self.learning_rate) ]) self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) fu.block_print('Pipeline setup done with %.2fs' % (time.time() - start_time))
def train_setup(self): main_loss = mu.get_loss(self._hp['loss'], self.data.gt, self.output) regu_loss = tf.losses.get_regularization_loss() loss = main_loss + regu_loss train_answer = tf.argmax(self.output, axis=1) train_accuracy, train_accuracy_update, train_accuracy_initializer \ = mu.get_acc(self.data.gt, train_answer, name='train_accuracy') global_step = tf.train.get_or_create_global_step() decay_step = int(self._hp['decay_epoch'] * len(self.data)) learning_rate = mu.get_lr(self._hp['decay_type'], self._hp['learning_rate'], global_step, decay_step, self._hp['decay_rate']) optimizer = mu.get_opt(self._hp['opt'], learning_rate, decay_step) grads_and_vars = optimizer.compute_gradients(loss) # grads_and_vars = [(tf.clip_by_norm(grad, 0.01, axes=[0]), var) if grad is not None else (grad, var) # for grad, var in grads_and_vars ] gradients, variables = list(zip(*grads_and_vars)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = tf.group( optimizer.apply_gradients(grads_and_vars, global_step), train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append( tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append( tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', loss), tf.summary.scalar('train_accuracy', train_accuracy), tf.summary.scalar('learning_rate', learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.train_init_op_list = [ self.data.initializer, train_accuracy_initializer ] self.train_op_list = [train_op, loss, train_accuracy, global_step]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, scale=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 self.loss += self.main_loss self.loss += self.regu_loss self.train_answer = tf.argmax(self.train_model.output, axis=1) self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, self.train_answer, name='train_accuracy') self.val_answer = tf.argmax(self.val_model.output, axis=1) self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) lang_grads_and_vars = [(grad, var) for grad, var in grads_and_vars if 'Visual' not in var.name] vis_grad_and_vars = [(grad, var) for grad, var in grads_and_vars if 'Visual' in var.name] self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.lang_train_op = tf.group( self.optimizer.apply_gradients(lang_grads_and_vars, self.global_step), self.train_accuracy_update) self.vis_train_op = tf.group( self.optimizer.apply_gradients(vis_grad_and_vars, self.global_step), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint( self._checkpoint_dir) self.train_init_op_list = [ self.train_data.initializer, self.train_accuracy_initializer ] self.val_init_op_list = [ self.val_data.initializer, self.val_accuracy_initializer ] self.lang_train_op_list = [ self.lang_train_op, self.loss, self.train_accuracy, self.global_step ] self.vis_train_op_list = [ self.vis_train_op, self.loss, self.train_accuracy, self.global_step ] self.val_op_list = [self.val_accuracy, self.val_accuracy_update] if attn: self.lang_train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.vis_train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.val_op_list += [ self.val_model.attn, self.val_model.belief, self.val_data.gt, self.val_answer ]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, beta=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.real_logit, labels=tf.ones_like(self.train_model.real_logit))) self.fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.fake_logit, labels=tf.zeros_like(self.train_model.fake_logit))) self.d_loss = self.real_loss + self.fake_loss self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=self.train_model.fake_logit, labels=tf.ones_like(self.train_model.fake_logit))) self.regu_loss = tf.losses.get_regularization_loss() self.loss = self.g_loss self.loss += self.main_loss self.loss += self.regu_loss self.d_loss += self.regu_loss self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, tf.argmax(self.train_model.output, axis=1), name='train_accuracy') self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) self.d_optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss, var_list=tf.trainable_variables('QA')) gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group(self.optimizer.apply_gradients(grads_and_vars), self.d_optimizer.minimize(self.d_loss, self.global_step, tf.trainable_variables('Discriminator')), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append(tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append(tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_accuracy), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.val_summaries_op = tf.summary.scalar('val_accuracy', self.val_accuracy) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint(self._checkpoint_dir) self.train_init_op_list = [self.train_data.initializer, self.train_accuracy_initializer] self.val_init_op_list = [self.val_data.initializer, self.val_accuracy_initializer] self.train_op_list = [self.train_op, self.loss, self.train_accuracy, self.global_step] # self.run_metadata = tf.RunMetadata() self.val_op_list = [self.val_accuracy, self.val_accuracy_update, self.val_summaries_op]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, scale=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 self.loss += self.main_loss self.loss += self.regu_loss self.train_answer = tf.argmax(self.train_model.output, axis=1) self.train_accuracy, self.train_accuracy_update, self.train_accuracy_initializer \ = mu.get_acc(self.train_data.gt, self.train_answer, name='train_accuracy') self.val_answer = tf.argmax(self.val_model.output, axis=1) self.val_accuracy, self.val_accuracy_update, self.val_accuracy_initializer \ = mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) # grads_and_vars = [(tf.clip_by_norm(grad, 0.01, axes=[0]), var) if grad is not None else (grad, var) # for grad, var in grads_and_vars ] gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group( self.optimizer.apply_gradients(grads_and_vars, self.global_step), self.train_accuracy_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append( tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append( tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_accuracy), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) self.val_summaries_op = tf.summary.scalar('val_accuracy', self.val_accuracy) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint( self._checkpoint_dir) self.train_init_op_list = [ self.train_data.initializer, self.train_accuracy_initializer ] self.val_init_op_list = [ self.val_data.initializer, self.val_accuracy_initializer ] self.train_op_list = [ self.train_op, self.loss, self.train_accuracy, self.global_step ] self.val_op_list = [ self.val_accuracy, self.val_accuracy_update, self.val_summaries_op ] if attn: self.train_op_list += [ self.train_model.attn, self.train_model.belief, self.train_data.gt, self.train_answer ] self.val_op_list += [ self.val_model.attn, self.val_model.belief, self.val_data.gt, self.val_answer ]
def __init__(self): if reset: if os.path.exists(self._checkpoint_dir): os.system('rm -rf %s' % self._checkpoint_dir) if os.path.exists(self._log_dir): os.system('rm -rf %s' % self._log_dir) if os.path.exists(self._attn_dir): os.system('rm -rf %s' % self._attn_dir) fu.make_dirs(os.path.join(self._checkpoint_dir, 'best')) fu.make_dirs(self._log_dir) fu.make_dirs(self._attn_dir) self.train_data = Input(split='train', mode=args.mode) self.val_data = Input(split='val', mode=args.mode) # self.test_data = TestInput() self.train_model = mod.Model(self.train_data, beta=hp['reg'], training=True) with tf.variable_scope(tf.get_variable_scope(), reuse=True): self.val_model = mod.Model(self.val_data) # with tf.variable_scope(tf.get_variable_scope(), reuse=True): # self.test_model = mod.Model(self.test_data, training=False) for v in tf.trainable_variables(): print(v) self.train_attn = tf.squeeze(self.train_model.attn[self.train_data.gt[0]]) self.val_attn = tf.squeeze(self.val_model.attn[self.val_data.gt[0]]) self.main_loss = mu.get_loss(hp['loss'], self.train_data.gt, self.train_model.output) self.attn_loss = mu.get_loss('hinge', tf.to_float(self.train_data.spec), self.train_attn) self.regu_loss = tf.losses.get_regularization_loss() self.loss = 0 if 'main' in target: self.loss += self.main_loss elif 'attn' in target: self.loss += self.attn_loss self.loss += self.regu_loss self.train_acc, self.train_acc_update, self.train_acc_init = \ mu.get_acc(self.train_data.gt, tf.argmax(self.train_model.output, axis=1), name='train_accuracy') self.train_attn_acc, self.train_attn_acc_update, self.train_attn_acc_init = \ mu.get_acc(self.train_data.spec, tf.to_int32(self.train_attn > 0.5), name='train_attention_accuracy') # self.train_q_attn_acc, self.train_q_attn_acc_update, self.train_q_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='train_q_attention_accuracy') # # self.train_a_attn_acc, self.train_a_attn_acc_update, self.train_a_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='train_a_attention_accuracy') self.val_acc, self.val_acc_update, self.val_acc_init = \ mu.get_acc(self.val_data.gt, tf.argmax(self.val_model.output, axis=1), name='val_accuracy') self.val_attn_acc, self.val_attn_acc_update, self.val_attn_acc_init = \ mu.get_acc(self.val_data.spec, tf.to_int32(self.val_attn > 0.5), name='val_attention_accuracy') # self.val_q_attn_acc, self.val_q_attn_acc_update, self.val_q_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='val_q_attention_accuracy') # # self.val_a_attn_acc, self.val_a_attn_acc_update, self.val_a_attn_acc_init = \ # tf.metrics.accuracy(self.train_data.spec, self.train_model.output, name='val_a_attention_accuracy') self.global_step = tf.train.get_or_create_global_step() decay_step = int(hp['decay_epoch'] * len(self.train_data)) self.learning_rate = mu.get_lr(hp['decay_type'], hp['learning_rate'], self.global_step, decay_step, hp['decay_rate']) self.optimizer = mu.get_opt(hp['opt'], self.learning_rate, decay_step) grads_and_vars = self.optimizer.compute_gradients(self.loss) gradients, variables = list(zip(*grads_and_vars)) self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(self.update_ops): self.train_op = tf.group(self.optimizer.apply_gradients(grads_and_vars, self.global_step), self.train_acc_update, self.train_attn_acc_update) # self.train_a_attn_acc_update, self.train_q_attn_acc_update) self.saver = tf.train.Saver(tf.global_variables()) self.best_saver = tf.train.Saver(tf.global_variables()) # Summary train_gv_summaries = [] for idx, grad in enumerate(gradients): if grad is not None: train_gv_summaries.append(tf.summary.histogram('gradients/' + variables[idx].name, grad)) train_gv_summaries.append(tf.summary.histogram(variables[idx].name, variables[idx])) train_summaries = [ tf.summary.scalar('train_loss', self.loss), tf.summary.scalar('train_accuracy', self.train_acc), # tf.summary.scalar('train_a_attn_accuracy', self.train_a_attn_acc), # tf.summary.scalar('train_q_attn_accuracy', self.train_q_attn_acc), tf.summary.scalar('train_attn_accuracy', self.train_attn_acc), tf.summary.scalar('learning_rate', self.learning_rate) ] self.train_summaries_op = tf.summary.merge(train_summaries) self.train_gv_summaries_op = tf.summary.merge(train_gv_summaries + train_summaries) val_summaries = [ tf.summary.scalar('val_accuracy', self.val_acc), tf.summary.scalar('val_attn_accuracy', self.val_attn_acc), # tf.summary.scalar('val_a_attn_accuracy', self.val_a_attn_acc), # tf.summary.scalar('val_q_attn_accuracy', self.val_q_attn_acc), ] self.val_summaries_op = tf.summary.merge(val_summaries) if args.checkpoint: self.checkpoint_file = args.checkpoint else: self.checkpoint_file = tf.train.latest_checkpoint(self._checkpoint_dir) self.train_init_op_list = [self.train_data.initializer, self.train_acc_init, # self.train_q_attn_acc_init, self.train_a_attn_acc_init, self.train_attn_acc_init] self.val_init_op_list = [self.val_data.initializer, self.val_acc_init, # self.val_q_attn_acc_init, self.val_a_attn_acc_init, self.val_attn_acc_init] self.train_op_list = [self.train_op, self.loss, self.attn_loss, self.train_acc, self.train_attn_acc, # self.val_q_attn_acc, self.val_a_attn_acc, self.global_step, self.train_data.spec, self.train_attn] self.val_op_list = [self.val_acc, self.val_attn_acc, # self.val_q_attn_acc, self.val_a_attn_acc, tf.group(self.val_acc_update, self.val_attn_acc_update # self.val_q_attn_acc_update, self.val_a_attn_acc_update ), self.val_summaries_op, self.val_data.spec, self.val_attn]
def old_main(): data = EmbeddingData(args.batch_size) if args.model == 'myconv': model = MyConvModel(data, char_dim=args.char_dim, conv_channel=args.conv_channel) elif args.model == 'myrnn': model = MyModel(data, char_dim=args.char_dim, hidden_dim=args.hidden_dim, num_layers=args.nlayers) elif args.model == 'mimick': model = EmbeddingModel(data, char_dim=args.char_dim, hidden_dim=args.hidden_dim) elif args.model == 'matrice': model = MatricesModel(data) else: model = None args_dict = vars(args) pairs = [(k, str(args_dict[k])) for k in sorted(args_dict.keys()) if args_dict[k] != parser.get_default(k) and not isinstance(args_dict[k], bool)] if pairs: exp_name = '_'.join(['.'.join(pair) for pair in pairs]) else: exp_name = '.'.join(('learning_rate', str(args.learning_rate))) log_dir = join(config.log_dir, 'embedding_log', exp_name) checkpoint_dir = join(config.checkpoint_dir, 'embedding_checkpoint', exp_name) checkpoint_name = join(checkpoint_dir, 'embedding') if args.reset: if exists(checkpoint_dir): os.system('rm -rf %s' % os.path.join(checkpoint_dir, '*')) if os.path.exists(log_dir): os.system('rm -rf %s' % os.path.join(log_dir, '*')) fu.make_dirs(log_dir) fu.make_dirs(checkpoint_dir) loss = get_loss(args.loss, data, model) + get_loss(args.sec_loss, data, model) baseline = tf.losses.mean_squared_error(data.vec, model.output) global_step = tf.train.get_or_create_global_step() total_step = int(math.floor(data.num_example / args.batch_size)) # learning_rate = tf.train.exponential_decay(args.learning_rate, # global_step, # args.decay_epoch * total_step, # args.decay_rate, # staircase=True) learning_rate = args.learning_rate optimizer = get_opt(args.optimizer, learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = list(zip(*grads_and_vars)) for var in variables: print(var.name, var.shape) if not args.model == 'myconv': capped_grads_and_vars = [(tf.clip_by_norm(gv[0], args.clip_norm), gv[1]) for gv in grads_and_vars] else: capped_grads_and_vars = grads_and_vars train_op = optimizer.apply_gradients(capped_grads_and_vars, global_step) saver = tf.train.Saver(tf.global_variables(), ) # Summary train_gv_summaries = [] for idx, var in enumerate(variables): train_gv_summaries.append( tf.summary.histogram('gradient/' + var.name, gradients[idx])) train_gv_summaries.append(tf.summary.histogram(var.name, var)) train_gv_summaries_op = tf.summary.merge(train_gv_summaries) train_summaries = [ tf.summary.scalar('loss', loss), tf.summary.scalar('baseline', baseline), tf.summary.scalar('learning_rate', learning_rate) ] train_summaries_op = tf.summary.merge(train_summaries) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) if args.checkpoint_file: checkpoint_file = args.checkpoint_file else: checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) # restore_fn = (lambda _sess: saver.restore(_sess, checkpoint_file)) \ # if checkpoint_file else (lambda _sess: _sess.run(init_op)) # sv = tf.train.Supervisor(logdir=log_dir, summary_op=None, # init_fn=restore_fn, save_model_secs=0, # saver=saver, global_step=global_step) config_ = tf.ConfigProto(allow_soft_placement=True, ) config_.gpu_options.allow_growth = True with tf.Session( config=config_) as sess, tf.summary.FileWriter(log_dir) as sw: sess.run(init_op) if checkpoint_file: saver.restore(sess, checkpoint_file) def save(): saver.save(sess, checkpoint_name, tf.train.global_step(sess, global_step)) def save_sum(summary_): sw.add_summary(summary_, tf.train.global_step(sess, global_step)) # Training loop def train_loop(epoch_, y=0, y_=0): shuffle(data.file_names) sess.run(data.iterator.initializer, feed_dict={ data.file_names_placeholder: data.file_names[:args.give_shards], }) step = tf.train.global_step(sess, global_step) print("Training Loop Epoch %d" % epoch_) step = step % total_step pbar = trange(step, total_step) for _ in pbar: try: _, l, bl, step, y, y_, gv_summary, summary = sess.run([ train_op, loss, baseline, global_step, model.output, data.vec, train_gv_summaries_op, train_summaries_op ]) if step % max((total_step // 100), 10) == 0: save_sum(summary) save_sum(gv_summary) if step % max((total_step // 10), 100) == 0: save() pbar.set_description( '[%s/%s] step: %d loss: %.3f base: %.3f' % (epoch_, args.epoch, step, l, bl)) except KeyboardInterrupt: save() print() return True print("Training Loop Epoch %d Done..." % epoch_) print(y_, y, sep='\n\n\n') time.sleep(10) save() return False now_epoch = tf.train.global_step(sess, global_step) // total_step + 1 for epoch in range(now_epoch, args.epoch + 1): if train_loop(epoch): break
def __init__(self, arguments, args_parser): # Initialize all constant parameters, # including paths, hyper-parameters, model name...etc. self.args = arguments self.args_dict = self.args.__dict__ self.val_dict = {'baseline': 0, 'max_length': 1, 'lock': True} self.args_parser = args_parser if len(sys.argv) == 1 or self.args.mode == 'inspect': self.search_param() else: self.retrieve_param(self.param_file) if self.args.reset: self.remove() fu.make_dirs(self.log_dir) fu.make_dirs(self.checkpoint_dir) if args.checkpoint_file: self.checkpoint_file = args.checkpoint_file else: self.checkpoint_file = tf.train.latest_checkpoint( self.checkpoint_dir) # Lock the current experiment. self.lock_exp() try: # tensorflow pipeline tf.reset_default_graph() self.data = EmbeddingData(self.args.batch_size) with tf.variable_scope('model'): self.model = self.get_model() self.loss = get_loss(self.args.loss, self.data, self.model) + \ get_loss(self.args.sec_loss, self.data, self.model) + \ tf.reduce_mean(tf.abs(tf.norm(self.model.output, axis=1) - 1)) self.baseline = get_loss('mse', self.data, self.model) self.global_step = tf.train.get_or_create_global_step() self.local_step = tf.get_variable('local_step', dtype=tf.int32, initializer=0, trainable=False) self.lr_placeholder = tf.placeholder(tf.float32, name='lr_placeholder') self.step_placeholder = tf.placeholder(tf.int64, name='step_placeholder') self.dr_placeholder = tf.placeholder(tf.float32, name='dr_placeholder') self.learning_rate = tf.train.exponential_decay( self.lr_placeholder, self.local_step, self.args.decay_epoch * self.step_placeholder, self.dr_placeholder, staircase=True) with tf.variable_scope('optimizer'): self.optimizer = get_opt(self.args.optimizer, self.learning_rate) grads_and_vars = self.optimizer.compute_gradients(self.loss) gradients, variables = list(zip(*grads_and_vars)) for var in variables: print(var.name, var.shape) if not args.model == 'myconv': capped_grads_and_vars = [ (tf.clip_by_norm(gv[0], args.clip_norm), gv[1]) for gv in grads_and_vars ] else: capped_grads_and_vars = grads_and_vars self.train_op = tf.group( self.optimizer.apply_gradients(capped_grads_and_vars, self.global_step), tf.assign(self.local_step, self.local_step + 1)) self.saver = tf.train.Saver(tf.global_variables(), ) # Summary for idx, var in enumerate(variables): tf.summary.histogram('gradient/' + var.name, gradients[idx]) tf.summary.histogram(var.name, var) tf.summary.scalar('loss', self.loss), tf.summary.scalar('baseline', self.baseline), tf.summary.scalar('learning_rate', self.learning_rate) self.train_summaries_op = tf.summary.merge_all() self.init_op = tf.variables_initializer(tf.global_variables() + tf.local_variables()) self.init_feed_dict = { self.model.init_mean: self.args.init_mean, self.model.init_stddev: self.args.init_scale } pp.pprint(tf.global_variables()) finally: self.unlock_exp()