def mgpu_train(*xs): gpu_ops = [] gpu_grads = [] xs = (tf.split(x, n_gpu, 0) for x in xs) for i, xs in enumerate(zip(*xs)): do_reuse = True if i > 0 else None with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse): clf_logits, clf_losses, lm_losses = model(*xs, train=True, reuse=do_reuse) if lm_coef > 0: train_loss = tf.reduce_mean(clf_losses) + lm_coef*tf.reduce_mean(lm_losses) else: train_loss = tf.reduce_mean(clf_losses) params = find_trainable_variables("model") grads = tf.gradients(train_loss, params) grads = list(zip(grads, params)) gpu_grads.append(grads) gpu_ops.append([clf_logits, clf_losses, lm_losses]) #print(gpu_ops) #print(*gpu_ops) ops = [tf.concat(op, 0) for op in zip(*gpu_ops)] #print(ops) grads = average_grads(gpu_grads) #print(grads) grads = [g for g, p in grads] #print(grads) train = opt_fns[opt](params, grads, lr, partial(lr_schedules[lr_schedule], warmup=lr_warmup), n_updates_total, l2=l2, max_grad_norm=max_grad_norm, vector_l2=vector_l2, b1=b1, b2=b2, e=e) #print([train]) return [train]+ops
def load_checkpoint(self, sess, path=None): if path is None: save_dir = os.path.join(self.params.save_dir, self.params.desc, 'best_params.jl') else: save_dir = path t_vars = utils.find_trainable_variables('model') # This should be fine since I'm loading my own saved weights sess.run([ p.assign(ip) for p, ip in zip(t_vars, joblib.load(os.path.join(save_dir))) ])
def build_model(args, scope): nh = args.max_clause nw = args.max_var nc = 2 nact = nc * nw ob_shape = (None, nh, nw, nc * args.nstack) X = tf.placeholder(tf.float32, ob_shape) Y = tf.placeholder(tf.float32, (None, nact)) Z = tf.placeholder(tf.float32, (None)) p, v = model3(X, nact, scope) params = find_trainable_variables(scope) with tf.name_scope("loss"): cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=p)) value_loss = tf.losses.mean_squared_error(labels = Z, predictions = v) lossL2 = tf.add_n([ tf.nn.l2_loss(vv) for vv in params ]) loss = cross_entropy + value_loss + args.l2_coeff * lossL2 return X, Y, Z, p, v, params, loss
def build_graph(sess): X = tf.placeholder(tf.int32, [None, N_CTX, 2]) M = tf.placeholder(tf.float32, [None, N_CTX]) lm_logits, lm_losses = model(X, M, train=False, reuse=False) params = find_trainable_variables('model') sess.run(tf.global_variables_initializer()) shapes = json.load(open('model/params_shapes.json')) offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [np.load('model/params_{}.npy'.format(n)) for n in range(10)] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [ param.reshape(shape) for param, shape in zip(init_params, shapes) ] init_params[0] = init_params[0][:N_CTX] init_params[0] = np.concatenate([init_params[1], init_params[0]], 0) del init_params[1] n_transfer = 1 + N_TRANSFER * 12 sess.run([ p.assign(ip) for p, ip in zip(params[:n_transfer], init_params[:n_transfer]) ]) return X, M, lm_logits, lm_losses
def __init__(self, policy, ob_space, ac_space, lr, max_grad_norm, units_per_hlayer, activ_fcn, log_interval, logdir, nenvs, batch_size, ent_coef, vf_coef, keep_model, meta=False): self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) self.logger.info('Set up A2C learning agent') self.num_steps_trained = 0 self.log_interval = log_interval sess = make_session() nact = ac_space.n nbatch = nenvs * batch_size self.global_step = tf.get_variable('global_step', [], tf.int32, tf.constant_initializer( 0, tf.int32), trainable=False) eval_model = policy(sess, ob_space, ac_space, 1, 1, units_per_hlayer, reuse=False, activ_fcn=activ_fcn) step_model = policy(sess, ob_space, ac_space, nenvs, 1, units_per_hlayer, reuse=tf.AUTO_REUSE, activ_fcn=activ_fcn) train_model = policy(sess, ob_space, ac_space, nenvs, batch_size, units_per_hlayer, reuse=True, activ_fcn=activ_fcn) # -- Loss computation -- A = tf.placeholder(tf.int32, [None]) ADV = tf.placeholder(tf.float32, [None]) R = tf.placeholder(tf.float32, [None]) def get_loss(model, placeholder_dict): a = placeholder_dict["A"] adv = placeholder_dict["ADV"] r = placeholder_dict["R"] # Compute cross entropy loss between estimated distribution of action and 'true' distribution of actions chosen_action_log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=model.pi_logit, labels=a) pg_loss = tf.reduce_mean(adv * chosen_action_log_probs) # minimize vf_loss = tf.reduce_mean(mse(tf.squeeze(model.vf), r)) # minimize entropy = -tf.reduce_mean(cat_entropy(model.pi_logit)) # maximize return pg_loss, entropy, vf_loss, model.vf, chosen_action_log_probs, None, None self.input_plchld = {'A': A, 'ADV': ADV, 'R': R} pg_loss, entropy, vf_loss, _, chosen_action_log_probs, _, _ = get_loss( train_model, self.input_plchld) loss = pg_loss + entropy * ent_coef + vf_loss * vf_coef vf = tf.squeeze(train_model.vf) params = find_trainable_variables("model") trainer = tf.train.AdamOptimizer(learning_rate=lr) # trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) gradients = trainer.compute_gradients(loss) grads, variables = zip(*gradients) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) _train = [ trainer.apply_gradients(grads), self.global_step.assign_add(nbatch) ] if log_interval > 0: for g, v in gradients: if g is not None: tf.summary.histogram("%s-grad" % v.name.replace(':', '_'), g) for p in params: if p is not None: tf.summary.histogram("train/%s" % p.name.replace(':', '_'), p.value()) tf.summary.scalar("train/pg_loss", pg_loss) tf.summary.scalar("train/vf_loss", vf_loss) tf.summary.scalar("train/entropy", entropy) tf.summary.histogram("others/ADV", ADV) tf.summary.histogram("others/neglocpac", chosen_action_log_probs) tf.summary.histogram("others/vf", vf) self.summary_step = tf.summary.merge_all() # Adding these to collection so we can restore them again tf.add_to_collection('inputs', eval_model.X) tf.add_to_collection('pi', eval_model.pi) tf.add_to_collection('pi_logit', eval_model.pi_logit) tf.add_to_collection('val', eval_model.vf) tf.add_to_collection('step', eval_model.ac) if eval_model.initial_state is not None: add_to_collection_rnn_state('state_in', eval_model.rnn_state_in) add_to_collection_rnn_state('state_out', eval_model.rnn_state_out) tf.global_variables_initializer().run(session=sess) def train(obs, states, rewards, actions, values): advs = rewards - values # Estimate for A = Q(s,a) - V(s) # for step in range(len(obs)): # cur_lr = lr.value() td_map = { train_model.X: obs, A: actions, ADV: advs, R: rewards } #, LR:cur_lr} if states is not None: td_map[train_model.rnn_state_in] = states # td_map[train_model.M] = masks policy_loss, value_loss, policy_entropy, test, ap, global_step = sess.run( [ pg_loss, vf_loss, entropy, _train, train_model.pi, self.global_step ], td_map) # TF summary logging if log_interval > 0 and (self.num_steps_trained % self.log_interval == 0): self.logger.info( 'Save summary of network weights, grads and losses.') summary_str = sess.run(self.summary_step, td_map) self.summary_writer.add_summary( tf.Summary.FromString(summary_str), global_step) self.num_steps_trained += 1 return policy_loss, value_loss, policy_entropy, ap saver = tf.train.Saver(max_to_keep=keep_model) def save(f_name): # test_run(20) gs = sess.run(self.global_step) self.logger.info( 'Save network parameters of model at global step %s' % gs) saver.save(sess, os.path.join(logdir, f_name), global_step=gs) def load(load_path): saver.restore(sess, load_path) def test_run(env, n_eps, n_pipes): self.logger.info('Evaluating current agent') ep_return = [] ep_length = [] for i in range(0, n_eps): obs = env.reset() obs = normalize_obs(obs) done = False if eval_model.initial_state is not None: if len(eval_model.initial_state) > 1: rnn_s_in = (np.zeros( eval_model.initial_state[0].shape), np.zeros(eval_model.initial_state[1].shape) ) # init lstm cell vector else: rnn_s_in = np.zeros(eval_model.initial_state.shape ) # init gru cell vector total_return = 0 total_length = 0 while not done and (total_return < n_pipes): # self.logger.info(total_return) if eval_model.initial_state is not None: pi, pi_log, act, rnn_s_out = sess.run( [ eval_model.pi, eval_model.pi_logit, eval_model.ac, eval_model.rnn_state_out ], feed_dict={ eval_model.X: [obs], eval_model.rnn_state_in: rnn_s_in }) else: pi, pi_log, act = sess.run([ eval_model.pi, eval_model.pi_logit, eval_model.ac ], feed_dict={ eval_model.X: [obs] }) ac = np.argmax(pi_log) obs, reward, done, _ = env.step(ac) obs = normalize_obs(obs) total_length += 1 total_return += reward if eval_model.initial_state is not None: rnn_s_in = rnn_s_out self.logger.info('Episode %s: %s, %s' % (i, total_return, total_length)) ep_length.append(total_length) ep_return.append(total_return) return ep_return self.get_loss = get_loss self.trainer = trainer self.train_vars = params self.train = train self.train_model = train_model self.eval_model = eval_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load self.test_run = test_run # Set the summary writer to write to the given logdir if logging is enabled if log_interval > 0: self.summary_writer = tf.summary.FileWriter( logdir, graph_def=sess.graph_def) else: self.summary_writer = None self.sess = sess
def __init__(self, policy, config): sess = tf.get_default_session() # CREATE THE PLACEHOLDERS actions_ = tf.placeholder(tf.int32, [None], name="actions_") advantages_ = tf.placeholder(tf.float32, [None], name="advantages_") rewards_ = tf.placeholder(tf.float32, [None], name="rewards_") lr_ = tf.placeholder(tf.float32, name="learning_rate_") # Keep track of old actor oldneglopac_ = tf.placeholder(tf.float32, [None], name="oldneglopac_") # Keep track of old critic oldvpred_ = tf.placeholder(tf.float32, [None], name="oldvpred_") # Cliprange cliprange_ = tf.placeholder(tf.float32, []) # CREATE OUR TWO MODELS # Step_model that is used for sampling step_model = policy(sess, config, reuse=False) # Test model for testing our agent #test_model = policy(sess, action_space, 1, 1, reuse=False) # Train model for training train_model = policy(sess, config, reuse=True) # CALCULATE THE LOSS # Total loss = Policy gradient loss - entropy * entropy coefficient + Value coefficient * value loss # Clip the value # Get the value predicted value_prediction = train_model.vf # Clip the value = Oldvalue + clip(value - oldvalue, min = - cliprange, max = cliprange) value_prediction_clipped = oldvpred_ + tf.clip_by_value( train_model.vf - oldvpred_, -cliprange_, cliprange_) # Unclipped value value_loss_unclipped = tf.square(value_prediction - rewards_) # Clipped value value_loss_clipped = tf.square(value_prediction_clipped - rewards_) # Value loss 0.5 * SUM [max(unclipped, clipped) vf_loss = 0.5 * tf.reduce_mean( tf.maximum(value_loss_unclipped, value_loss_clipped)) # Clip the policy # Output -log(pi) (new -log(pi)) neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=actions_) # Remember we want ratio (pi current policy / pi old policy) # But neglopac returns us -log(policy) # So we want to transform it into ratio # e^(-log old - (-log new)) == e^(log new - log old) == e^(log(new / old)) # = new/old (since exponential function cancels log) # Wish we can use latex in comments ratio = tf.exp(oldneglopac_ - neglogpac) # ratio = pi new / pi old # Remember also that we're doing gradient ascent, aka we want to MAXIMIZE the objective function which is equivalent to say # Loss = - J # To make objective function negative we can put a negation on the multiplication (pi new / pi old) * - Advantages pg_loss_unclipped = -advantages_ * ratio # value, min [1 - e] , max [1 + e] pg_loss_clipped = -advantages_ * tf.clip_by_value( ratio, 1.0 - cliprange_, 1.0 + cliprange_) # Final PG loss # Why maximum, because pg_loss_unclipped and pg_loss_clipped are negative, getting the min of positive elements = getting # the max of negative elements pg_loss = tf.reduce_mean(tf.maximum(pg_loss_unclipped, pg_loss_clipped)) # Calculate the entropy # Entropy is used to improve exploration by limiting the premature convergence to suboptimal policy. entropy = tf.reduce_mean(train_model.pd.entropy()) # Total loss (Remember that L = - J because it's the same thing than max J loss = pg_loss - entropy * config.ent_coef + vf_loss * config.vf_coef # UPDATE THE PARAMETERS USING LOSS # 1. Get the model parameters params = ut.find_trainable_variables("model") # 2. Calculate the gradients grads = tf.gradients(loss, params) if config.max_grad_norm is not None: # Clip the gradients (normalize) grads, grad_norm = tf.clip_by_global_norm(grads, config.max_grad_norm) grads = list(zip(grads, params)) # zip aggregate each gradient with parameters associated # For instance zip(ABCD, xyza) => Ax, By, Cz, Da # 3. Build our trainer trainer = tf.train.RMSPropOptimizer(learning_rate=lr_, epsilon=1e-5) # 4. Backpropagation _train = trainer.apply_gradients(grads) # Train function def train(ask_book_env, bid_book_env, inv_env, funds_env, actions,\ returns, values, neglogpacs, lr, cliprange): # Here we calculate advantage A(s,a) = R + yV(s') - V(s) # Returns = R + yV(s') advantages = returns - values # Normalize the advantages (taken from aborghi implementation) advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) # We create the feed dictionary td_map = { train_model.input_ask_book: ask_book_env, train_model.input_bid_book: bid_book_env, train_model.input_inventory: inv_env, train_model.input_funds: funds_env, actions_: actions, advantages_: advantages, rewards_: returns, lr_: lr, cliprange_: cliprange, oldneglopac_: neglogpacs, oldvpred_: values } policy_loss, value_loss, policy_entropy, _ = sess.run( [pg_loss, vf_loss, entropy, _train], td_map) return policy_loss, value_loss, policy_entropy def save(save_path): """ Save the model """ saver = tf.train.Saver() saver.save(sess, save_path) def load(load_path): """ Load the model """ saver = tf.train.Saver() print('Loading ' + load_path) saver.restore(sess, load_path) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value # self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess)
def save(path): # save the values of the trainable variables (we, h0, h1, ...) ps = sess.run(utils.find_trainable_variables('model')) joblib.dump(ps, utils.make_path(path))
logits = result[ 3] # shape: [?, 2] *Note: 2 for classifying the input as the right or wrong clf_loss = result[4] # shape: [?] * label - predicted_logit lm_loss = result[5] # shape: [?] loss = clf_loss elif params.head_type == "lm": lm_loss = result[3] loss = lm_loss else: raise ValueError("Not a valid head_type!") config = tf.ConfigProto() # Tensorflow properties (ask Fabian) config.gpu_options.allow_growth = True config.allow_soft_placement = True sess = tf.Session(config=config) t_vars = utils.find_trainable_variables( 'model' ) # contains the trainable variables from model for gradient desc # --- load pretrained parameter ----------------------------------------------------------------------------------- print("\nLoading pretrained parameter ...") # Initialize global variables transformer_decoder.init_and_load_parameter_from_file(sess=sess, path="model/") # --- add evaluation nodes to tensorflow graph -------------------------------------------------------------------- # Just add the node, not actually perform eval. Eval is performed in iter_apply,iter_predict # perform training but this time turn off dropout??? # eval_mgpu_result: returns the losses but not grads since only evaluating eval_mgpu_result = transformer_decoder.mgpu_predict( X_train, M_train, Y_train) """
def train(self): global_step = tf.train.get_or_create_global_step() X_train = tf.placeholder(tf.int32, [self.n_batch_train, 2, n_ctx, 2]) M_train = tf.placeholder(tf.float32, [self.n_batch_train, 2, n_ctx]) X = tf.placeholder(tf.int32, [None, 2, n_ctx, 2]) M = tf.placeholder(tf.float32, [None, 2, n_ctx]) Y_train = tf.placeholder(tf.int32, [self.n_batch_train]) Y = tf.placeholder(tf.int32, [None]) #self.train, self.logits, self.clf_losses, self.lm_losses = self.mgpu_train(self.X_train, self.M_train, self.Y_train) xs = [X_train, M_train, Y_train] gpu_ops = [] gpu_grads = [] xs = (tf.split(x, n_gpu, 0) for x in xs) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=b1, beta2=b2, epsilon=e) for i, xs in enumerate(zip(*xs)): do_reuse = True if i > 0 else None with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope( tf.get_variable_scope(), reuse=do_reuse): logits, clf_losses, lm_losses = self.model(*xs, train=True, reuse=do_reuse) if lm_coef > 0: train_loss = tf.reduce_mean( clf_losses) + lm_coef * tf.reduce_mean(lm_losses) else: train_loss = tf.reduce_mean(clf_losses) raw_grads_and_vars = optimizer.compute_gradients(train_loss) grads_and_vars = [(tf.clip_by_global_norm([gv[0]], max_grad_norm)[0][0], gv[1]) for gv in raw_grads_and_vars] gpu_grads.append(grads_and_vars) gpu_ops.append([logits, clf_losses, lm_losses]) ops = [tf.concat(op, 0) for op in zip(*gpu_ops)] logits, clf_losses, lm_losses = ops grads = average_grads(gpu_grads) train_op = optimizer.apply_gradients(grads, global_step=global_step) clf_loss = tf.reduce_mean(clf_losses) saver = tf.train.Saver(max_to_keep=5) self.params = find_trainable_variables('model_lm') if pre_load: restore_op = [ p.assign(ip) for p, ip in zip( self.params, joblib.load(lm_dir + '/model_lm.params')) ] self.eval_mgpu_logits, self.eval_mgpu_clf_losses, self.eval_mgpu_lm_losses = self.mgpu_predict( X_train, M_train, Y_train) self.eval_logits, self.eval_clf_losses, self.eval_lm_losses = self.model( X, M, Y, train=False, reuse=True) self.eval_clf_loss = tf.reduce_mean(self.eval_clf_losses) self.eval_mgpu_clf_loss = tf.reduce_mean(self.eval_mgpu_clf_losses) summary_op = tf.get_collection(tf.GraphKeys.SUMMARIES) def trva_split(data, index): return [data[i] for i in index] x1, x2, y = encode_dataset(self.text_encoder, atec(data_dir)) valid_index = np.load('data/valid_index.npy') if data_dir == 'data/para.tsv': valid_index = np.concatenate([ valid_index, valid_index + len(y) // 4, valid_index + len(y) // 2, valid_index + 3 * len(y) // 4 ]) valid_index = valid_index.tolist() train_index = list(set(valid_index) ^ set(range(len(y)))) trX1, trX2, trY = trva_split(x1, train_index), trva_split( x2, train_index), trva_split(y, train_index) vaX1, vaX2, vaY = trva_split(x1, valid_index), trva_split( x2, valid_index), trva_split(y, valid_index) trX, trM = self.transform_roc(trX1, trX2) vaX, vaM = self.transform_roc(vaX1, vaX2) n_train = len(trY) n_valid = len(vaY) self.n_updates_total = (n_train // self.n_batch_train) * n_iter def log(): def iter_apply(Xs, Ms, Ys): fns = [ lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x)) ] results = [] for xmb, mmb, ymb in iter_data((Xs, Ms, Ys), n_batch=self.n_batch_train, truncate=False, verbose=True): n = len(xmb) if n == self.n_batch_train: res = sess.run( [self.eval_mgpu_logits, self.eval_mgpu_clf_loss], { X_train: xmb, M_train: mmb, Y_train: ymb }) else: res = sess.run([self.eval_logits, self.eval_clf_loss], { X: xmb, M: mmb, Y: ymb }) res = [r * n for r in res] results.append(res) results = zip(*results) return [fn(res) for res, fn in zip(results, fns)] # global best_score tr_logits, tr_cost = iter_apply(trX[:n_valid], trM[:n_valid], trY[:n_valid]) va_logits, va_cost = iter_apply(vaX, vaM, vaY) tr_cost = tr_cost / len(trY[:n_valid]) va_cost = va_cost / n_valid tr_f1 = f1_score(trY[:n_valid], np.argmax(tr_logits, 1)) * 100. va_f1 = f1_score(vaY, np.argmax(va_logits, 1)) * 100. tf.logging.info( '%d %d %.3f %.3f %.2f %.2f' % (n_epochs, n_updates, tr_cost, va_cost, tr_f1, va_f1)) scaffold = tf.train.Scaffold(saver=saver) log_hook = tf.train.LoggingTensorHook( { 'step': global_step, 'train_loss': clf_loss }, every_n_iter=100) summary_hook = tf.train.SummarySaverHook(save_steps=100, output_dir=save_dir, summary_op=summary_op) hooks = [summary_hook, log_hook] tf_config = tf.ConfigProto(allow_soft_placement=True) tf_config.gpu_options.allow_growth = True n_epochs = 0 with tf.train.MonitoredTrainingSession(hooks=hooks, save_checkpoint_secs=600, checkpoint_dir=save_dir, scaffold=scaffold, config=tf_config) as sess: if pre_load: sess.run(restore_op) for i in range(n_iter): for xmb, mmb, ymb in iter_data( (shuffle(trX, trM, trY, random_state=np.random)), n_batch=self.n_batch_train, truncate=True, verbose=True): cost, _, n_updates = sess.run( [clf_loss, train_op, global_step], { X_train: xmb, M_train: mmb, Y_train: ymb }) if n_updates % 100 == 0: log() n_epochs += 1 log()
train_flag = tf.placeholder_with_default(True, shape=()) print(data_iterator.max_word) dp = { 'max_word': data_iterator.max_word, 'n_vocab': data_iterator.n_vocab, 'n_special': 3, 'clf_token': data_iterator.encoder['_classify_'] } logits, clf_losses, lm_losses = model(X, M, Y, train_flag, data_params=dp) #lr, global_step = decay_learning_rate(6.25e-5) optimizer = tf.train.AdamOptimizer(6.25e-5) train_op = optimizer.minimize(clf_losses + 0.5 * lm_losses) #, global_step=global_step) params = find_trainable_variables('transformer') sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(tf.global_variables_initializer()) shapes = json.load(open('./pretrain/params_shapes.json')) offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [ np.load('./pretrain/params_{}.npy'.format(n)) for n in range(10) ] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [ param.reshape(shape) for param, shape in zip(init_params, shapes) ] init_params[0] = init_params[0][:max_word] init_params[0] = np.concatenate([ init_params[1],
def init_and_load_parameter_from_file(self, sess, path): tvars = utils.find_trainable_variables('model') with open(os.path.join(path, 'params_shapes.json') ) as f: # loads list of shapes from json file shapes = json.load( f) # [[512, 768], [40478, 768], [1, 768, 2304], ..., ] """ - np.cumsum: a = np.array([[1,2,3], [4,5,6]]) np.cumsum(a) = array([ 1, 3, 6, 10, 15, 21]) - load all the np params to a list - concatenate the params on axis 0, split according to offsets list and remove last sub-array np.split: If an index exceeds the dimension of the array along axis, an empty sub-array is returned correspondingly. x = np.arange(5.0, 13.0) np.split(x, [3, 5, 6, 10]) # split x based on the indices in the list >>> [array([ 5., 6., 7.]), # first three: [0:3] array([ 8., 9.]), # next two: [3:5] array([ 10.]), # next one: [5:6] array([ 11., 12.]), # next four: [6:10], but only two left in the array so only assigns them array([], dtype=float64)] # last ones: [10:], but nothing left so returns empty array - reshape each split (concatenated) param into the corresponding shape - give embeddings to the special params ( _classify_, _delimiter_, ...) - concat the dictionary embeddings, special embeddings and the learned / pre-trained input sequence embeddings """ offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [ np.load(os.path.join(path, 'params_{}.npy'.format(n))) for n in range(10) ] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [ param.reshape(shape) for param, shape in zip(init_params, shapes) ] embeddings_special = ( np.random.randn(self.params.n_special, self.params.n_embd) * 0.02).astype(np.float32) init_embeddings = np.concatenate([ init_params[1], embeddings_special, init_params[0][:self.params.n_ctx] ]) init_params[0] = init_embeddings del init_params[1] # delete the vocab / dictionary embeddings if self.params.n_transfer == -1: self.params.n_transfer = 0 else: # 1 (for we: i.e. the dictionary, special, input emb) + # 144 (for the other 12 layers which happen to have 12 trainable variables each :D) self.params.n_transfer = 1 + self.params.n_transfer * 12 sess.run(tf.global_variables_initializer()) """ Perform transfer learning: set the first n_transfer variables (see how init_params and tvars looks like) tvars contains: index 0: weight embeddings (model/we:0) [40558, 768]: concat of dictionary, special and input seq embedds., index 1: (model/h0/attn/c_attn/w:0) [1, 768, 2304]: weight for the self attn for similarity calc in layer 0, index2: (model/h0/attn/c_attn/b:0) [2304]: bias for the self attn in layer 0, index3: (model/h0/attn/c_proj/w:0) [2304]: weight for the final attn output after softmax of all similarity output in layer 0, ... (c_proj/b:0, layer_norm1, mlp, layer_norm2), ... and so on for the remaining 11 layers (The clf weight and bias are not assigned since learning hasn't been done yet for it (so just initialized) """ if self.use_encoder is False: sess.run([ p.assign(ip) for p, ip in zip(tvars[:self.params.n_transfer], init_params[:self.params.n_transfer]) ]) else: # load only word embeddings # for x in range(len(tvars)): # if tvars[x].name == 'model/we:0': # sess.run([p.assign(ip) for p, ip in zip(tvars[x], init_params[0])]) sess.run( [p.assign(ip) for p, ip in zip(tvars[:1], init_params[:1])])
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef=0.01, vf_coef=0.5, mf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear'): sess = tf_util.make_session() nact = ac_space.n nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) ADV_MOMENT = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) R2 = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) ENT_COEF = tf.placeholder(tf.float32, []) step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False) train_model = policy(sess, ob_space, ac_space, nenvs * nsteps, nsteps, reuse=True) neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean((ADV) * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) mf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.mf), R2)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) ent_coef = Scheduler(v=ent_coef, nvalues=total_timesteps / 10, schedule='step') mf_coef = 0.01 loss = pg_loss - entropy * ENT_COEF + vf_loss * vf_coef + mf_loss * mf_coef # loss = pg_loss + vf_loss * vf_coef + mf_loss * mf_coef # loss = pg_loss - entropy*ent_coef + vf_loss * vf_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) def train(obs, states, rewards, rewards_square, masks, actions, values, moments): values_random = np.random.normal( loc=values, scale=np.sqrt(np.maximum(moments - values**2, 0))) # values_random = values - np.sqrt(np.maximum(moments - values ** 2,0)) advs = rewards - values_random # advs = (1 - 2 * rewards) * rewards - values + 2 * values * values advs_moment = rewards_square - moments # advs = (1 + 2 * rewards) * (rewards) # advs_moment = rewards_square for step in range(len(obs)): cur_lr = lr.value() cur_ent_coef = ent_coef.value() td_map = { train_model.X: obs, A: actions, ADV: advs, ADV_MOMENT: advs_moment, R: rewards, R2: rewards_square, LR: cur_lr, ENT_COEF: cur_ent_coef } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks policy_loss, value_loss, moment_loss, policy_entropy, _ = sess.run( [pg_loss, vf_loss, mf_loss, entropy, _train], td_map) return policy_loss, value_loss, moment_loss, policy_entropy def save(save_path): ps = sess.run(params) make_path(osp.dirname(save_path)) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) ps = sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess)
n_train = len(trY) n_valid = len(vaY) n_batch_train = n_batch*n_gpu n_updates_total = (n_train//n_batch_train)*n_iter X_train = tf.placeholder(tf.int32, [n_batch_train, 1, n_ctx, 2]) M_train = tf.placeholder(tf.float32, [n_batch_train, 1, n_ctx]) X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2]) M = tf.placeholder(tf.float32, [None, 1, n_ctx]) Y_train = tf.placeholder(tf.int32, [n_batch_train]) Y = tf.placeholder(tf.int32, [None]) train, logits, clf_losses, lm_losses = mgpu_train(X_train, M_train, Y_train) clf_loss = tf.reduce_mean(clf_losses) params = find_trainable_variables('model') sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) sess.run(tf.global_variables_initializer()) shapes = json.load(open('model/params_shapes.json')) offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [np.load('model/params_{}.npy'.format(n)) for n in range(10)] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [param.reshape(shape) for param, shape in zip(init_params, shapes)] init_params[0] = init_params[0][:n_ctx] init_params[0] = np.concatenate([init_params[1], (np.random.randn(n_special, n_embd)*0.02).astype(np.float32), init_params[0]], 0) del init_params[1] if n_transfer == -1: n_transfer = 0
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear', summary_dir=None): sess = tf_util.make_session() nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) step_model = policy(sess, ob_space, ac_space, nenvs, 1, reuse=False) train_model = policy(sess, ob_space, ac_space, nenvs * nsteps, nsteps, reuse=True) neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean(ADV * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) loss = pg_loss - entropy * ent_coef + vf_loss * vf_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) # storing summaries episode_reward = tf.placeholder("float") tf.summary.scalar("policy_loss", pg_loss) tf.summary.scalar("entropy", entropy) tf.summary.scalar("value_loss", vf_loss) tf.summary.scalar("episode_reward", episode_reward) summary_op = tf.summary.merge_all() def train(obs, states, mean_reward, rewards, masks, actions, values): advs = rewards - values for step in range(len(obs)): cur_lr = lr.value() td_map = { train_model.X: obs, A: actions, ADV: advs, R: rewards, LR: cur_lr, episode_reward: mean_reward } if states is not None: td_map[train_model.S] = states td_map[train_model.M] = masks policy_loss, value_loss, policy_entropy, summary, _ = sess.run( [pg_loss, vf_loss, entropy, summary_op, _train], td_map) return policy_loss, value_loss, policy_entropy, summary def save(save_path): ps = sess.run(params) make_path(osp.dirname(save_path)) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess) self.train_writer = tf.summary.FileWriter(summary_dir, sess.graph)
def fever_app(caller): global db, tokenizer, text_encoder, encoder, X_train, M_train, X, M, Y_train, Y,params,sess, n_batch_train, db_file, \ drqa_index, max_page, max_sent, encoder_path, bpe_path, n_ctx, n_batch, model_file global n_vocab,n_special,n_y,max_len,clf_token,eval_lm_losses,eval_clf_losses,eval_mgpu_clf_losses,eval_logits, \ eval_mgpu_logits,eval_logits LogHelper.setup() logger = LogHelper.get_logger("papelo") logger.info("Load config") config = json.load(open(os.getenv("CONFIG_FILE","configs/config-docker.json"))) globals().update(config) print(globals()) logger.info("Set Seeds") random.seed(42) np.random.seed(42) tf.set_random_seed(42) logger.info("Load FEVER DB") db = FeverDocDB(db_file) retrieval = TopNDocsTopNSents(db, max_page, max_sent, True, False, drqa_index) logger.info("Init word tokenizer") tokenizer = SimpleWordSplitter() # Prepare text encoder logger.info("Load BPE Text Encoder") text_encoder = TextEncoder(encoder_path, bpe_path) encoder = text_encoder.encoder n_vocab = len(text_encoder.encoder) n_y = 3 encoder['_start_'] = len(encoder) encoder['_delimiter_'] = len(encoder) encoder['_classify_'] = len(encoder) clf_token = encoder['_classify_'] n_special = 3 max_len = n_ctx // 2 - 2 n_batch_train = n_batch logger.info("Create TF Placeholders") X_train = tf.placeholder(tf.int32, [n_batch, 1, n_ctx, 2]) M_train = tf.placeholder(tf.float32, [n_batch, 1, n_ctx]) X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2]) M = tf.placeholder(tf.float32, [None, 1, n_ctx]) Y_train = tf.placeholder(tf.int32, [n_batch]) Y = tf.placeholder(tf.int32, [None]) logger.info("Model Setup") eval_logits, eval_clf_losses, eval_lm_losses = model(X, M, Y, train=False, reuse=None) eval_mgpu_logits, eval_mgpu_clf_losses, eval_mgpu_lm_losses = mgpu_predict(X_train, M_train, Y_train) logger.info("Create TF Session") params = find_trainable_variables('model') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=float(os.getenv("TF_GPU_MEMORY_FRACTION","0.5"))) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) sess.run(tf.global_variables_initializer()) sess.run([p.assign(ip) for p, ip in zip(params, joblib.load(model_file))]) logger.info("Ready") def predict(instances): predictions = [] for instance in tqdm(instances): sents = retrieval.get_sentences_for_claim(instance["claim"]) found_evidence = resolve_evidence(sents) instance["tokenized_claim"] = " ".join(map(lambda x: x.text, tokenizer.split_words(instance["claim"]))) sub_instances = make_instances(instance, found_evidence) sub_predictions = predict_sub_instances(text_encoder, sub_instances) refute_evidence = [i for i, x in enumerate(sub_predictions) if x == 2] support_evidence = [i for i, x in enumerate(sub_predictions) if x == 0] if len(support_evidence): predicted_label = "SUPPORTS" predicted_evidence = [[found_evidence[i]["title"], found_evidence[i]["line_number"]] for i in support_evidence] elif len(refute_evidence): predicted_label = "REFUTES" predicted_evidence = [[found_evidence[i]["title"], found_evidence[i]["line_number"]] for i in refute_evidence] else: predicted_label = "NOT ENOUGH INFO" predicted_evidence = [] predictions.append({"predicted_label":predicted_label, "predicted_evidence": predicted_evidence}) return predictions return caller(predict)
def train(self): global_step = tf.train.get_or_create_global_step() X_train = tf.placeholder(tf.int32, [self.n_batch_train, 2, n_ctx, 2]) M_train = tf.placeholder(tf.float32, [self.n_batch_train, 2, n_ctx]) X = tf.placeholder(tf.int32, [None, 2, n_ctx, 2]) M = tf.placeholder(tf.float32, [None, 2, n_ctx]) Y_train = tf.placeholder(tf.int32, [self.n_batch_train]) Y = tf.placeholder(tf.int32, [None]) #self.train, self.logits, self.clf_losses, self.lm_losses = self.mgpu_train(self.X_train, self.M_train, self.Y_train) xs = [X_train, M_train, Y_train] gpu_ops = [] gpu_grads = [] xs = (tf.split(x, n_gpu, 0) for x in xs) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=b1, beta2=b2, epsilon=e) for i, xs in enumerate(zip(*xs)): do_reuse = True if i > 0 else None with tf.device(assign_to_gpu(i, "/gpu:0")), tf.variable_scope( tf.get_variable_scope(), reuse=do_reuse): logits, clf_losses, lm_losses = self.model(*xs, train=True, reuse=do_reuse) if lm_coef > 0: train_loss = tf.reduce_mean( clf_losses) + lm_coef * tf.reduce_mean(lm_losses) else: train_loss = tf.reduce_mean(clf_losses) raw_grads_and_vars = optimizer.compute_gradients(train_loss) grads_and_vars = [(tf.clip_by_global_norm([gv[0]], max_grad_norm)[0][0], gv[1]) for gv in raw_grads_and_vars] gpu_grads.append(grads_and_vars) gpu_ops.append([logits, clf_losses, lm_losses]) ops = [tf.concat(op, 0) for op in zip(*gpu_ops)] logits, clf_losses, lm_losses = ops grads = average_grads(gpu_grads) train_op = optimizer.apply_gradients(grads, global_step=global_step) clf_loss = tf.reduce_mean(clf_losses) saver = tf.train.Saver(max_to_keep=5) self.params = find_trainable_variables('model_lm') self.eval_mgpu_logits, self.eval_mgpu_clf_losses, self.eval_mgpu_lm_losses = self.mgpu_predict( X_train, M_train, Y_train) self.eval_logits, self.eval_clf_losses, self.eval_lm_losses = self.model( X, M, Y, train=False, reuse=True) self.eval_clf_loss = tf.reduce_mean(self.eval_clf_losses) self.eval_mgpu_clf_loss = tf.reduce_mean(self.eval_mgpu_clf_losses) summary_op = tf.get_collection(tf.GraphKeys.SUMMARIES) def trva_split(data, index): return [data[i] for i in index] x1, x2, y = encode_dataset(self.text_encoder, atec(data_dir)) valid_index = np.load('data/valid_index.npy') if data_dir == 'data/para.tsv': valid_index = np.concatenate([ valid_index, valid_index + len(y) // 4, valid_index + len(y) // 2, valid_index + 3 * len(y) // 4 ]) valid_index = valid_index.tolist() train_index = list(set(valid_index) ^ set(range(len(y)))) trX1, trX2, trY = trva_split(x1, train_index), trva_split( x2, train_index), trva_split(y, train_index) vaX1, vaX2, vaY = trva_split(x1, valid_index), trva_split( x2, valid_index), trva_split(y, valid_index) trX, trM = self.transform_roc(trX1, trX2) vaX, vaM = self.transform_roc(vaX1, vaX2) n_train = len(trY) n_valid = len(vaY) self.n_updates_total = (n_train // self.n_batch_train) * n_iter self.build_graph() if pre_load: shapes = json.load(open('model/params_shapes.json')) offsets = np.cumsum([np.prod(shape) for shape in shapes]) init_params = [ np.load('model/params_{}.npy'.format(n)) for n in range(10) ] init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] init_params = [ param.reshape(shape) for param, shape in zip(init_params, shapes) ] init_params[0] = init_params[0][:+n_ctx] init_params[0] = np.concatenate([ init_params[1], (np.random.randn(self.n_special, n_embd) * 0.02).astype( np.float32), init_params[0] ], 0) del init_params[1] if self.n_transfer == -1: self.n_transfer = 0 else: self.n_transfer = 1 + self.n_transfer * 12 self.sess.run([ p.assign(ip) for p, ip in zip(self.params[:self.n_transfer], init_params[:self.n_transfer]) ]) if not new_model: print('loading old model') self.load() print('load success') n_updates = 0 n_epochs = 0 self.save(os.path.join(save_dir, desc, 'best_params.jl')) self.best_score = 0 def log(): def iter_apply(Xs, Ms, Ys): fns = [ lambda x: np.concatenate(x, 0), lambda x: float(np.sum(x)) ] results = [] for xmb, mmb, ymb in iter_data((Xs, Ms, Ys), n_batch=self.n_batch_train, truncate=False, verbose=True): n = len(xmb) if n == self.n_batch_train: res = sess.run( [self.eval_mgpu_logits, self.eval_mgpu_clf_loss], { X_train: xmb, M_train: mmb, Y_train: ymb }) else: res = sess.run([self.eval_logits, self.eval_clf_loss], { X: xmb, M: mmb, Y: ymb }) res = [r * n for r in res] results.append(res) results = zip(*results) return [fn(res) for res, fn in zip(results, fns)] # global best_score tr_logits, tr_cost = iter_apply(trX[:n_valid], trM[:n_valid], trY[:n_valid]) va_logits, va_cost = iter_apply(vaX, vaM, vaY) tr_cost = tr_cost / len(trY[:n_valid]) va_cost = va_cost / n_valid tr_f1 = f1_score(trY[:n_valid], np.argmax(tr_logits, 1)) * 100. va_f1 = f1_score(vaY, np.argmax(va_logits, 1)) * 100. self.logger.log(n_epochs=n_epochs, n_updates=n_updates, tr_cost=tr_cost, va_cost=va_cost, tr_f1=tr_f1, va_f1=va_f1) print('%d %d %.3f %.3f %.2f %.2f' % (n_epochs, n_updates, tr_cost, va_cost, tr_f1, va_f1)) score = va_f1 if score > self.best_score: self.best_score = score self.save(os.path.join(save_dir, desc, 'best_params.jl')) for i in range(n_iter): for xmb, mmb, ymb in iter_data( (shuffle(trX, trM, trY, random_state=np.random)), n_batch=self.n_batch_train, truncate=True, verbose=True): cost, _ = self.sess.run([self.clf_loss, self.train], { self.X_train: xmb, self.M_train: mmb, self.Y_train: ymb }) n_updates += 1 if n_updates % 1000 == 0: log() n_epochs += 1 log()
def ccc_train(self): # Resolve hostnames and ports of other nodes host, hosts = client(bootstrap_host, bootstrap_port) # Create a cluster and identify the job name and task of this node cluster = tf.train.ClusterSpec({ 'ps': hosts[:num_ps], 'worker': hosts[num_ps:] }) task = hosts.index(host) job_name = ('ps', 'worker')[task >= num_ps] task = cluster.job_tasks(job_name).index(host) tf_config = tf.ConfigProto(allow_soft_placement=True) tf_config.gpu_options.allow_growth = True server = tf.train.Server(cluster, job_name=job_name, task_index=task, config=tf_config) if job_name == 'ps': # create a shared queue on the parameter server which is visible on /job:ps/task:%d with tf.device('/job:ps/task:%d' % task): queue = tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue%d' % task) # wait for the queue to be filled with tf.Session(server.target) as sess: for i in range(cluster.num_tasks('worker')): sess.run(queue.dequeue()) print('ps:%d received "done" from worker:%d' % (task, i)) print('ps:%d quitting' % task) elif job_name == 'worker': with tf.device( tf.train.replica_device_setter( worker_device='/job:worker/task:%d' % task, cluster=cluster)): global_step = tf.train.get_or_create_global_step() sentences = self.batched_data( tfrecord_filename, self.single_example_parser, self.n_batch_train, padded_shapes=tf.Dimension(n_ctx), num_epochs=n_iter) sentences = tf.cast(sentences, tf.int32) max_len = tf.shape(sentences)[1] #sentences.get_shape()[1] xmb = tf.reshape(sentences, [self.n_batch_train, 1, max_len, 1]) M_train = tf.cast( tf.reshape(tf.sign(xmb), [self.n_batch_train, 1, max_len]), tf.float32) positions = tf.reshape(tf.range( self.n_vocab + self.n_special, self.n_vocab + self.n_special + max_len), shape=[1, 1, max_len, 1]) #tf.constant(np.arange(self.n_vocab + self.n_special, self.n_vocab + self.n_special + max_len),shape=[1, 1, max_len, 1]) positions = tf.tile(positions, [self.n_batch_train, 1, 1, 1]) X_train = tf.concat([xmb, positions], axis=3) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=b1, beta2=b2, epsilon=e) gpu_grads = [] gpu_loss = [] gpu_ppl = [] xs = [X_train, M_train] xs = (tf.split(x, n_gpu, 0) for x in xs) for i, xs in enumerate(zip(*xs)): do_reuse = True if i > 0 else None with tf.device(assign_to_gpu(i)), tf.variable_scope( tf.get_variable_scope(), reuse=do_reuse): lm_losses = self.model(*xs, train=True, num_ps=num_ps) train_ppl_single = tf.reduce_mean(math.e**lm_losses) train_loss_single = tf.reduce_mean(lm_losses) gpu_loss.append(train_loss_single) gpu_ppl.append(train_ppl_single) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=b1, beta2=b2, epsilon=e) raw_grads_and_vars = optimizer.compute_gradients( train_loss_single) grads_and_vars = [ (tf.clip_by_global_norm([gv[0]], max_grad_norm)[0][0], gv[1]) for gv in raw_grads_and_vars ] gpu_grads.append(grads_and_vars) train_ppl = tf.reduce_mean(gpu_ppl) train_loss = tf.reduce_mean(gpu_loss) grads = average_grads(gpu_grads) train_op = optimizer.apply_gradients(grads, global_step=global_step) saver = tf.train.Saver(max_to_keep=5) X = tf.placeholder(tf.int32, [None, 1, n_ctx, 2]) M = tf.placeholder(tf.float32, [None, 1, n_ctx]) valid_lm_losses = self.model(X, M, train=False, reuse=True) valid_ppl = tf.reduce_mean(math.e**valid_lm_losses) valid_loss = tf.reduce_mean(valid_lm_losses) self.params = find_trainable_variables('model_lm') tf.summary.scalar('train_loss', train_loss) #tf.summary.scalar('valid_loss', valid_loss) tf.summary.scalar('train_ppl', train_ppl) #tf.summary.scalar('valid_ppl', valid_ppl) summary_op = tf.summary.merge_all() done_ops = [] # create a shared queue on the worker which is visible on /job:ps/task:%d for i in range(cluster.num_tasks('ps')): with tf.device('/job:ps/task:%d' % i): with tf.name_scope('done_queue'): done_queue = tf.FIFOQueue(cluster.num_tasks('worker'), tf.int32, shared_name='done_queue' + str(i)) done_ops.append(done_queue.enqueue(task)) scaffold = tf.train.Scaffold(saver=saver) summary_hook = tf.train.SummarySaverHook(save_steps=1000, output_dir=save_dir, summary_op=summary_op) hooks = [ summary_hook, # tf.train.CheckpointSaverHook(save_secs=600, checkpoint_dir=save_dir, saver=saver), tf.train.StopAtStepHook(last_step=1000000), tf.train.LoggingTensorHook( { 'step': global_step, 'train_loss': train_loss, 'ppl': train_ppl }, every_n_iter=100), tf.train.FinalOpsHook([done_ops]) ] valid_data = pre_train_valid(valid_dir) vaX1 = encode_dataset(self.text_encoder, pre_train(valid_data))[0] vaX, vaM = self.transform_roc(vaX1) with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(task == 0), hooks=hooks, save_checkpoint_secs=600, checkpoint_dir=save_dir, scaffold=scaffold) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): ppl, loss, _, step = sess.run([ train_ppl, train_loss, train_op, global_step ]) #,options=run_options, run_metadata=run_metadata) if step % steps_to_validate == 0: va_cost = [] va_ppl = [] for xm, mm in iter_data((vaX, vaM), n_batch=self.n_batch_train, truncate=False, verbose=True): ps = sess.run(self.params) joblib.dump(ps, save_dir + 'model_lm.params', protocol=2) res, ppl = sess.run([valid_loss, valid_ppl], { X: xm, M: mm }) va_cost.append(np.sum(res)) va_ppl.append(np.sum(ppl)) va_cost = np.average(va_cost) va_ppl = np.average(va_ppl) tf.logging.info( '=========n_steps:\t%d valid_cost:\t%.3f valid ppl:\t%.3f==========' % (step, va_cost, va_ppl)) except tf.errors.OutOfRangeError: print('Epochs Complete!') finally: coord.request_stop() coord.join(threads)
def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, nstack, num_procs, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, lr=7e-4, alpha=0.99, epsilon=1e-5, total_timesteps=int(80e6), lrschedule='linear', optimizer='adam'): config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=num_procs, inter_op_parallelism_threads=num_procs) config.gpu_options.allow_growth = True sess = tf.Session(config=config) nbatch = nenvs * nsteps A = tf.placeholder(tf.int32, [nbatch]) ADV = tf.placeholder(tf.float32, [nbatch]) R = tf.placeholder(tf.float32, [nbatch]) LR = tf.placeholder(tf.float32, []) train_model = policy(sess, ob_space, ac_space, nenvs, nsteps, nstack, reuse=True) step_model = train_model neglogpac = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=train_model.pi, labels=A) pg_loss = tf.reduce_mean(ADV * neglogpac) vf_loss = tf.reduce_mean(mse(tf.squeeze(train_model.vf), R)) entropy = tf.reduce_mean(cat_entropy(train_model.pi)) loss = pg_loss + vf_loss * vf_coef - entropy * ent_coef params = find_trainable_variables("model") grads = tf.gradients(loss, params) if max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm(grads, max_grad_norm) grads = list(zip(grads, params)) if optimizer == 'adam': trainer = tf.train.AdamOptimizer() else: trainer = tf.train.RMSPropOptimizer(learning_rate=LR, decay=alpha, epsilon=epsilon) _train = trainer.apply_gradients(grads) lr = Scheduler(v=lr, nvalues=total_timesteps, schedule=lrschedule) def train(obs, states, rewards, masks, actions, values): advs = rewards - values for step in range(len(obs)): cur_lr = lr.value() td_map = { train_model.X: obs, A: actions, ADV: advs, R: rewards, LR: cur_lr } if states != []: td_map[train_model.S] = states td_map[train_model.M] = masks total_loss, policy_loss, value_loss, policy_entropy, _ = sess.run( [loss, pg_loss, vf_loss, entropy, _train], td_map) return total_loss, policy_loss, value_loss, policy_entropy def save(save_path): ps = sess.run(params) make_path(save_path) joblib.dump(ps, save_path) def load(load_path): loaded_params = joblib.load(load_path) restores = [] for p, loaded_p in zip(params, loaded_params): restores.append(p.assign(loaded_p)) ps = sess.run(restores) self.train = train self.train_model = train_model self.step_model = step_model self.step = step_model.step self.value = step_model.value self.initial_state = step_model.initial_state self.save = save self.load = load tf.global_variables_initializer().run(session=sess)
def mgpu_train(self, *xs): gpu_ops = [] gpu_grads = [] tvars = None # split input data into number of gpus (4 for Fab, 2 for me, 1 on the computer) xs = (tf.split(x, self.params.n_gpu, 0) for x in xs) for i, xs in enumerate(zip(*xs)): do_reuse = True if i > 0 else None """ reuse: variable foo/gpu:X can be shared in a reusing scope, else gives error logits: the result from the last layer, loss: the difference between this result and label model(): - assign each input to the model and build train graph - clf_logits: [?, 2], clf_loss: [?] where ?: shape of current batch input; logits is [,2] because we are classifying btwn two diff input seqns for train: these results operation are also used to perform gradient descent and update in gpu (unlike in mgpu_predict where they are just used to only calc the themselves in the gpu) tf.gradients(): apply gradient diff. calc (Jacobian) to the trainable variables grads = list(): zips the gradient descent values and the variables to which they are to be applied on gpu_ops.append: appends the logit and loss outputs from each gpu if clf """ with tf.device(utils.assign_to_gpu( i, "/gpu:0")), tf.variable_scope(tf.get_variable_scope(), reuse=do_reuse): clf_logits, lm_logits, clf_losses, lm_losses = self.model( *xs, train=True, reuse=do_reuse) if self.params.head_type == "clf": if self.params.lm_coef > 0: # calculate and apply a joint loss if clf task also includes lm train_loss = tf.reduce_mean( clf_losses ) + self.params.lm_coef * tf.reduce_mean(lm_losses) tf.summary.scalar('Multi-task Clf-Lm Loss average', train_loss) else: train_loss = tf.reduce_mean(clf_losses) tf.summary.scalar('Clf Loss average', train_loss) elif self.params.head_type == "lm": train_loss = tf.reduce_mean(lm_losses) tf.summary.scalar('Lm Loss average', train_loss) else: raise ValueError( "{} is not a valid parameter for head_type!".format( self.params.head_type)) tvars = utils.find_trainable_variables("model") grads = tf.gradients(train_loss, tvars) grads = list(zip(grads, tvars)) gpu_grads.append( grads) # appends the gradient properties from each gpu if self.params.head_type == "clf": gpu_ops.append([clf_logits, clf_losses, lm_losses]) elif self.params.head_type == "lm": gpu_ops.append([ lm_losses ]) # appends just the loss outputs from each gpu if lm else: raise ValueError( "{} is not a valid parameter for head_type!".format( self.params.head_type)) ops = [tf.concat(op, 0) for op in zip(*gpu_ops) ] # concatenate the loss result from the different gpus # contains [an average of the grads from each gpu, and the corresponding variables] grads = utils.average_grads(gpu_grads) """ Gradient operations (only in train, not in predict) Accumulate gradient and perform update after a certain treshold. False for rocstories The threshold condition is defined in the train-loop section in __main__ in train.py zero_ops: operation to assign 0s into a non-trainable tf.Variable of shape tvars accum_ops: operation to store the average of the grads from each gpu into a non-trainable tf.Variable of shape tvars else loop: returns only the gradients, not the variables """ if self.params.gradient_accumulation: tvars = utils.find_trainable_variables("model") accum_tvars = [ tf.Variable(tf.zeros_like(tv.initialized_value()), trainable=False) for tv in tvars ] zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_tvars] accum_ops = [ accum_tvars[i].assign_add(grad[0]) for i, grad in enumerate(grads) ] grads = accum_tvars else: zero_ops = None accum_ops = None grads = [g for g, p in grads] # Perform Optimization (rocstories:- param.opt: adam) # partial(LR_SCHEDULES...): i guess for changing the lr decay value over time (Not sure) train = OPT_FNS[self.params.opt]( tvars, grads, self.params.lr, partial(LR_SCHEDULES[self.params.lr_schedule], warmup=self.params.lr_warmup), self.params.n_updates_total, l2=self.params.l2, max_grad_norm=self.params.max_grad_norm, vector_l2=self.params.vector_l2, b1=self.params.b1, b2=self.params.b2, e=self.params.e) # Tensorboard self.merged = tf.summary.merge_all() self.writer = tf.summary.FileWriter(self.logdir, tf.Session().graph) # sess.graph return [train, accum_ops, zero_ops] + ops
def prepare_loss(self): self.X_input_train_shape = (None, self.img_height, self.img_width, self.num_classes * self.num_stack) self.X_input_step_shape = (None, self.img_height, self.img_width, self.num_classes * self.num_stack) self.actions = tf.placeholder(tf.int32, [None]) # actions self.advantage = tf.placeholder(tf.float32, [None]) # advantage function self.reward = tf.placeholder(tf.float32, [None]) # reward self.learning_rate = tf.placeholder(tf.float32, []) # learning rate self.is_training = tf.placeholder(tf.bool) # is_training # The model structure self.actor_network = self.policy(self.sess, self.X_input_step_shape, self.num_actions, reuse=False, is_training=False) self.critic_network = self.policy(self.sess, self.X_input_train_shape, self.num_actions, reuse=True, is_training=self.is_training) with tf.variable_scope('train_output'): negative_log_prob_action = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.critic_network.policy_logits, labels=self.actions) self.policy_gradient_loss = tf.reduce_mean( self.advantage * negative_log_prob_action) self.value_function_loss = tf.reduce_mean( mse(tf.squeeze(self.critic_network.value_function), self.reward)) self.entropy = tf.reduce_mean( openai_entropy(self.critic_network.policy_logits)) self.loss = self.policy_gradient_loss - self.entropy * self.entropy_coeff + self.value_function_loss * self.vf_coeff # Gradient Clipping params = find_trainable_variables("policy") grads = tf.gradients(self.loss, params) if self.max_grad_norm is not None: grads, grad_norm = tf.clip_by_global_norm( grads, self.max_grad_norm) # Apply Gradients grads = list(zip(grads, params)) optimizer = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate, decay=self.alpha, epsilon=self.epsilon) self.optimize = optimizer.apply_gradients(grads) # monitor training summaries = [] summaries.append( tf.summary.scalar('loss/policy_gradient_loss', self.policy_gradient_loss)) summaries.append( tf.summary.scalar('loss/value_function_loss', self.value_function_loss)) summaries.append(tf.summary.scalar('loss/entropy', self.entropy)) summaries.append(tf.summary.scalar('loss/total_loss', self.loss)) summaries.append(tf.summary.scalar('train/gradnorm', grad_norm)) self.summary = tf.summary.merge(summaries)