def eval(self, paths, **kwargs): """ Return bonus """ if self.score_discrim: self._compute_path_probs(paths, insert=True) obs, obs_next, acts, path_probs = self.extract_paths( paths, keys=('observations', 'observations_next', 'actions', 'a_logprobs')) path_probs = np.expand_dims(path_probs, axis=1) scores = tf.get_default_session().run(self.discrim_output, feed_dict={ self.act_t: acts, self.obs_t: obs, self.nobs_t: obs_next, self.lprobs: path_probs }) score = np.log(scores) - np.log(1 - scores) score = score[:, 0] else: obs, acts = self.extract_paths(paths) reward = tf.get_default_session().run(self.reward, feed_dict={ self.act_t: acts, self.obs_t: obs }) score = reward[:, 0] return self.unpack(score, paths)
def train(self, T): vals = self.utils.calc_traj_vals for ep in range(self.ppo_epochs): batch_iters = int(len(vals) / self.batch_size) #print('batch',batch_iters,len(vals)) val_loss = 0 pol_loss = 0 if batch_iters == 0: s = np.array([mem[0] for mem in vals], dtype='float32') #batch state a = np.array([mem[1] for mem in vals], dtype='float32') #batch action td = np.array([mem[4] for mem in vals], dtype='float32') #batch td adv = np.array([mem[3] for mem in vals], dtype='float32') v_loss, a_loss, _ = tf.get_default_session().run( [self.v_loss, self.a_loss, self.op], feed_dict={ self.net.s: s, self.net.a: a, self.net.adv: adv, self.net.td: td, self.old_net.s: s, self.old_net.a: a, self.old_net.adv: adv, self.old_net.td: td }) val_loss += v_loss pol_loss += a_loss batch_iters = 1 #print("vf_loss: {:.5f}, pol_loss: {:.5f}".format(val_loss, pol_loss)) else: for i in range(batch_iters): cur_idx = i * self.batch_size batch = vals[cur_idx:cur_idx + self.batch_size] s = np.array([mem[0] for mem in batch], dtype='float32') #batch state a = np.array([mem[1] for mem in batch], dtype='float32') #batch action td = np.array([mem[4] for mem in batch], dtype='float32') #batch td adv = np.array([mem[3] for mem in batch], dtype='float32') v_loss, a_loss, _ = tf.get_default_session().run( [self.v_loss, self.a_loss, self.op], feed_dict={ self.net.s: s, self.net.a: a, self.net.adv: adv, self.net.td: td, self.old_net.s: s, self.old_net.a: a, self.old_net.adv: adv, self.old_net.td: td }) val_loss += v_loss pol_loss += a_loss print("vf_loss: {:.5f}, pol_loss: {:.5f}".format( val_loss / batch_iters, pol_loss / batch_iters))
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # Load the model metagraph and checkpoint print('Model directory: %s' % args.model_dir) meta_file, ckpt_file = get_model_filenames( os.path.expanduser(args.model_dir)) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) model_dir_exp = os.path.expanduser(args.model_dir) saver = tf.train.import_meta_graph(os.path.join( model_dir_exp, meta_file), clear_devices=True) tf.get_default_session().run(tf.global_variables_initializer()) tf.get_default_session().run(tf.local_variables_initializer()) saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file)) # Retrieve the protobuf graph definition and fix the batch norm nodes input_graph_def = sess.graph.as_graph_def() # Freeze the graph def output_graph_def = freeze_graph_def(sess, input_graph_def, 'embeddings,label_batch') # Serialize and dump the output graph to the filesystem with tf.gfile.GFile(args.output_file, 'wb') as f: f.write(output_graph_def.SerializeToString()) print("%d ops in the final graph: %s" % (len(output_graph_def.node), args.output_file))
def test_MpiAdam(): np.random.seed(0) tf.set_random_seed(0) a = tf.Variable(np.random.randn(3).astype('float32')) b = tf.Variable(np.random.randn(2,5).astype('float32')) loss = tf.reduce_sum(tf.square(a)) + tf.reduce_sum(tf.sin(b)) stepsize = 1e-2 update_op = tf.train.AdamOptimizer(stepsize).minimize(loss) do_update = U.function([], loss, updates=[update_op]) tf.get_default_session().run(tf.global_variables_initializer()) for i in range(10): print(i,do_update()) tf.set_random_seed(0) tf.get_default_session().run(tf.global_variables_initializer()) var_list = [a,b] lossandgrad = U.function([], [loss, U.flatgrad(loss, var_list)], updates=[update_op]) adam = MpiAdam(var_list) for i in range(10): l,g = lossandgrad() adam.update(g, stepsize) print(i,l)
def run(self): avg = 0 num_iters = 100000 avg_100 = 0 cur_episode = 0 cur_iter = 0 sample_ep = 3 num_steps = 3072 while cur_iter < num_iters: #Get Trajectories T = [] score_sum = 0 print("Iteration", cur_iter) tf.get_default_session().run(self.assign_op) cur_steps = 0 cur_episode = 0 while cur_steps < num_steps: trajectory, score, steps = self.get_samples() score_sum += score cur_steps += steps T.append(trajectory) cur_episode += 1 print('Reward {}'.format(score)) print( f"T average {cur_iter} total reward: {score_sum/cur_episode}") #Train self.train(T) cur_iter += 1 avg += score_sum / cur_episode avg_100 += score_sum / cur_episode print('\n\n') if (cur_iter) % 10 == 0: print(f"average {cur_iter} total reward: {avg_100/100}") avg_100 = 0
def replay(self): minibatch = random.sample(self.memory, self.batch_size) states = [] q_target = [] for state, action, reward, next_state in minibatch: states.append(state) q_eval = tf.get_default_session().run( self.qmodel_output, feed_dict={self.state: state}) q_next = tf.get_default_session().run( self.qmodel_output, feed_dict={self.state: next_state}) target_value = reward + self.gamma * np.max(q_next) # q_target_ = copy.copy(q_eval) q_target_ = q_eval.copy() q_target_[0][action] = target_value q_target.append(q_target_) states = np.reshape(np.array(states), [-1, self.state_size]) q_target = np.reshape(np.array(q_target), [-1, self.action_size]) feed_dict = {self.state: states, self.q_target: q_target} # batch training _, summary = tf.get_default_session().run([self.train_op, self.merged], feed_dict=feed_dict) self.file_writer.add_summary(summary)
def __init__(self, env): #Make Session sess = tf.InteractiveSession() self.env = env self.lr = 1e-4 #self.critic_lr=0.001 self.gamma = 0.99 self.gae = 0.95 self.net = ACModel( env, 'net', ) self.old_net = ACModel(env, 'old') #PPO Parameters self.ppo_epochs = 10 self.batch_size = 64 self.clip_range = 0.2 #For entropy for exploration self.ent_coef = 0.01 self.saver = tf.train.Saver(max_to_keep=5000) self.build_update() self.build_update_models() self.utils = Utils() tf.get_default_session().run(tf.global_variables_initializer())
def get_action(self, obs, stochastic=True): if stochastic: act, v_pred = tf.get_default_session().run( [self.act_stochastic, self.v_preds], feed_dict={self.obs: obs}) return act.item(), v_pred.item() else: act, v_pred = tf.get_default_session().run( [self.act_deterministic, self.v_preds], feed_dict={self.obs: obs}) return act.item(), v_pred.item()
def train(self, obs, actions, gaes, rewards, v_preds_next): tf.get_default_session().run(self.train_op, feed_dict={ self.Policy.obs: obs, self.Old_Policy.obs: obs, self.actions: actions, self.rewards: rewards, self.v_preds_next: v_preds_next, self.gaes: gaes })
def test_preserves_existing_session(self): with tf.Session() as sess: op = tf.reduce_sum(input_tensor=[2, 2]) self.assertIs(sess, tf.get_default_session()) result = self._square(123) self.assertEqual(123 * 123, result) self.assertIs(sess, tf.get_default_session()) number_of_lights = sess.run(op) self.assertEqual(number_of_lights, 4)
def run(self): traj_gen = self.traj_generator() iteration = 0 while (True): #for _ in range(50): #-----200------ >>>>>>True iteration += 1 # if(iteration!=1): # env.end() # env.setUp() print("\n================= iteration {} =================".format( iteration)) traj = traj_gen.__next__() self.add_vtarg_and_adv(traj) tfc.get_default_session().run(self.assign_op) traj["advantage"] = (traj["advantage"] - np.mean( traj["advantage"])) / np.std(traj["advantage"]) len = int((self.step_size - self.n_round) / self.batch_size) for _ in range(self.epochs): vf_loss = 0 pol_loss = 0 entropy = 0 for i in range(len): cur = i * self.batch_size + self.n_round *step_losses, _ = tfc.get_default_session().run( [ self.ent, self.vf_loss, self.pol_loss, self.update_op ], feed_dict={ self.obs_place: traj["ob"][cur:cur + self.batch_size], self.acts_place: traj["action"][cur:cur + self.batch_size], self.adv_place: traj["advantage"][cur:cur + self.batch_size], self.return_place: traj["return"][cur:cur + self.batch_size] }) entropy += step_losses[0] / len vf_loss += step_losses[1] / len pol_loss += step_losses[2] / len print("vf_loss: {:.5f}, pol_loss: {:.5f}, entorpy: {:.5f}". format(vf_loss, pol_loss, entropy)) env.end() env.setUp() if iteration % 2 == 0: self.save_model('./model/checkpointPhoenixVREP_2.ckpt') env.end()
def train(self, obs, actions, rewards, v_preds_next, gaes): # Função de treinamento tf.get_default_session().run( [self.train_op], feed_dict={ self.Policy.obs: obs, self.Old_Policy.obs: obs, self.actions: actions, self.rewards: rewards, self.v_preds_next: v_preds_next, self.gaes: gaes })
def restore_model_params(model_params): gvar_names = list(model_params.keys()) assign_ops = { gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign") for gvar_name in gvar_names } init_values = { gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items() } feed_dict = { init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names } tf.get_default_session().run(assign_ops, feed_dict=feed_dict)
def get_model_params(): gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) return { gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars)) }
def loadLpips(): with g_LoadingMutex: global g_Lpips if g_Lpips: return g_Lpips model_path = os.environ.get('MODEL_PATH_LPIPS') if model_path is None: print('invalid model name:', model_path) return global g_Session if g_Session is None: print('Initializing dnnlib...') dnnlib.tflib.init_tf() g_Session = tf.get_default_session() print('Loading model lpips ...') with open(model_path, 'rb') as f: with g_Session.as_default(): lpips = pickle.load(f) g_Lpips = lpips return g_Lpips
def train(self, state, td_error, action_one_hot, actor_lr): sess = tf.get_default_session() feed_dict_policy = {self.state: state, self.R_t: td_error, self.action: action_one_hot, self.learning_rate: actor_lr} sess.run([self.optimizer, self.loss], feed_dict_policy)
def init_tf(config_dict: dict = None) -> None: """Initialize TensorFlow session using good default settings.""" # Skip if already initialized. if tf.get_default_session() is not None: return # Setup config dict and random seeds. cfg = _sanitize_tf_config(config_dict) np_random_seed = cfg["rnd.np_random_seed"] if np_random_seed is not None: np.random.seed(np_random_seed) tf_random_seed = cfg["rnd.tf_random_seed"] if tf_random_seed == "auto": tf_random_seed = np.random.randint(1 << 31) if tf_random_seed is not None: tf.set_random_seed(tf_random_seed) # Setup environment variables. for key, value in list(cfg.items()): fields = key.split(".") if fields[0] == "env": assert len(fields) == 2 os.environ[fields[1]] = str(value) # Create default TensorFlow session. create_session(cfg, force_as_default=True)
def get_f_scores(self): scores = tf.get_default_session().run(self.f_reward, feed_dict={ self.obs_t: obs_t, self.nobs_t: kwargs['nobs_t'] }) return scores
def _load_checkpoint(checkpoint_filename, extra_vars, trainable_only=False): if tf.gfile.IsDirectory(checkpoint_filename): checkpoint_filename = tf.train.latest_checkpoint(checkpoint_filename) logging.info('Loading checkpoint %s', checkpoint_filename) saveables = (tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) + tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS)) if trainable_only: saveables = list(set(saveables) & set(tf.trainable_variables())) # Try to restore all saveables, if that fails try without extra_vars. try: saver = tf.train.Saver(var_list=saveables) saver.restore(tf.get_default_session(), checkpoint_filename) except (ValueError, tf.errors.NotFoundError): logging.info('Missing key in checkpoint. Trying old checkpoint format.') saver = tf.train.Saver(var_list=list(set(saveables) - set(extra_vars))) saver.restore(tf.get_default_session(), checkpoint_filename)
def train_step( meta_learner: MetaLearner, feed_batch: Tuple[common.FeedList], *, sess: Optional[tf.Session] = None, **sess_kwargs, ): """Performs one meta-training step. Parameters ---------- meta_learner : MetaLearner feed_batch : tuple of FeedLists sess : tf.Session, optional Returns ------- loss : float The loss value at the current training step. """ if sess is None: sess = tf.get_default_session() # Make a training step and compute loss. losses, _ = sess.run( [meta_learner.meta_losses, meta_learner.meta_train_op], feed_dict=dict(sum(feed_batch, [])), **sess_kwargs, ) return losses
def validate_probtype(probtype, pdparam): N = 100000 # Check to see if mean negative log likelihood == differential entropy Mval = np.repeat(pdparam[None, :], N, axis=0) M = probtype.param_placeholder([N]) X = probtype.sample_placeholder([N]) pd = probtype.pdfromflat(M) calcloglik = U.function([X, M], pd.logp(X)) calcent = U.function([M], pd.entropy()) Xval = tf.get_default_session().run(pd.sample(), feed_dict={M: Mval}) logliks = calcloglik(Xval, Mval) entval_ll = -logliks.mean() # pylint: disable=E1101 entval_ll_stderr = logliks.std() / np.sqrt(N) # pylint: disable=E1101 entval = calcent(Mval).mean() # pylint: disable=E1101 assert np.abs(entval - entval_ll) < 3 * entval_ll_stderr # within 3 sigmas # Check to see if kldiv[p,q] = - ent[p] - E_p[log q] M2 = probtype.param_placeholder([N]) pd2 = probtype.pdfromflat(M2) q = pdparam + np.random.randn(pdparam.size) * 0.1 Mval2 = np.repeat(q[None, :], N, axis=0) calckl = U.function([M, M2], pd.kl(pd2)) klval = calckl(Mval, Mval2).mean() # pylint: disable=E1101 logliks = calcloglik(Xval, Mval2) klval_ll = -entval - logliks.mean() # pylint: disable=E1101 klval_ll_stderr = logliks.std() / np.sqrt(N) # pylint: disable=E1101 assert np.abs(klval - klval_ll) < 3 * klval_ll_stderr # within 3 sigmas print('ok on', probtype, pdparam)
def call(self, dict_obs, new, istate, update_obs_stats=False): for ob in dict_obs.values(): if ob is not None: if update_obs_stats: raise NotImplementedError ob = ob.astype(np.float32) ob = ob.reshape(-1, *self.ob_space.shape) self.ob_rms.update(ob) # Note: if it fails here with ph vs observations inconsistency, check if you're loading agent from disk. # It will use whatever observation spaces saved to disk along with other ctor params. feed1 = {self.ph_ob[k]: dict_obs[k][:, None] for k in self.ph_ob_keys} feed2 = { self.ph_istate: istate, self.ph_new: new[:, None].astype(np.float32) } feed1.update({ self.ph_mean: self.ob_rms.mean, self.ph_std: self.ob_rms.var**0.5 }) # for f in feed1: # print(f) a, vpred_int, vpred_ext, nlp, newstate, ent = tf.get_default_session( ).run([ self.a_samp, self.vpred_int_rollout, self.vpred_ext_rollout, self.nlp_samp, self.snext_rollout, self.entropy_rollout ], feed_dict={ **feed1, **feed2 }) return a[:, 0], vpred_int[:, 0], vpred_ext[:, 0], nlp[:, 0], newstate, ent[:, 0]
def prob(self, observation, a): sess = self.sess or tf.get_default_session() feed_dict = { self.X: adjust_shape(self.X, observation), self.action_ph: adjust_shape(self.action_ph, a) } return sess.run([self.pdf], feed_dict)[0]
def init_training(state): """Initializes the training loop. Args: state: Training state. Returns: True if checkpoint was found and false otherwise. """ state.large_summaries = merge_summaries(state.large_summaries) # pytype: disable=annotation-type-mismatch state.small_summaries = merge_summaries(state.small_summaries) # pytype: disable=annotation-type-mismatch sess = tf.get_default_session() init_op = state.init_op if state.init_op is None: init_op = [tf.initializers.global_variables(), tf.initializers.local_variables()] sess.run(init_op) restore_path = os.path.dirname(os.path.join(FLAGS.train_log_dir, state.checkpoint_suffix)) checkpoint = util.latest_checkpoint(restore_path) if checkpoint is None: logging.warning('No checkpoint found.') return False else: logging.info('Restoring from "%s".', checkpoint) state.saver.restore(sess, checkpoint) return True
def train(self, states, rewards, next_states, done, weight, summary=True): """Function that takes a transition (s,a,r,s') and optimizes Bellman error. Args: states: object, a batch of observations. rewards: np.array, immediate reward attained after executing those actions dtype must be float32 and shape must be (batch_size,). next_states: object, observations that followed states. done: np.array, 1 if obs_t was the last observation in the episode and 0 otherwise obs_tp1 gets ignored, but must be of the valid shape. dtype must be float32 and shape must be (batch_size,). weight: np.array, importance sampling weights for every element of the batch. dtype must be float32 and shape must be (batch_size,). summary: Boolean, whether to get summary. Returns: td_error: np.array. a list of differences between Q(s,a) and the target in Bellman's equation. dtype is float32 and shape is (batch_size,). """ if summary: ops = [self.td_error, self.error_summary, self.optimization_op] else: ops = [self.td_error, self.optimization_op] feed_dict = { self.state_t: states, self.reward_t: rewards, self.done_mask: done, self.error_weight: weight } for i, next_state in enumerate(next_states): feed_dict[self.state_tp1[i]] = next_state return tf.get_default_session().run(ops, feed_dict=feed_dict)
def evaluate_deterministic(model, make_data_iterator_fn, dataset_name, episodic, num_batches_to_discard=0, temperature=1.0, print_level=2, prediction_callback=None, extra_ops=()): """Evaluate with a single pass with dropout turned off.""" sum_xe = 0 sum_len = 0 num_batches = 0 last_state = None for (cond, cond_len, source, source_len, target) in make_data_iterator_fn(): feed = _make_feed(model, cond, cond_len, source, source_len, target, last_state, episodic, 0, temperature) xe, last_state = tf.get_default_session().run( [model.xe_losses, model.last_state] + list(extra_ops), feed)[0:2] if num_batches >= num_batches_to_discard: sum_xe1, sum_len1 = _sum_masked(source_len, xe) sum_xe += sum_xe1 sum_len += sum_len1 if prediction_callback: prediction_callback(target, source_len, xe) num_batches += 1 average_xe = sum_xe / sum_len if print_level >= 1: logging.info('final %s xe: %6.5f (%s), batches: %s', dataset_name, average_xe, sum_len, num_batches - num_batches_to_discard) return average_xe
def load_state(fname, sess=None): from baselines import logger logger.warn( 'load_state method is deprecated, please use load_variables instead') sess = sess or get_session() saver = tf.train.Saver() saver.restore(tf.get_default_session(), fname)
def load_model(model: os.PathLike, input_map: dict = None) -> None: """ Load tensorflow model :param model: Folder that contains the network model :param input_map: Optional input mapping. See: https://www.tensorflow.org/api_docs/python/tf/graph_util/import_graph_def """ # Check if the model is a model directory (containing a metagraph and a checkpoint file) # or if it is a protobuf file with a frozen graph model_exp = os.path.expanduser(model) if os.path.isfile(model_exp): #print('Model filename: %s' % model_exp) with gfile.FastGFile(model_exp, 'rb') as f: graph_def = tensorflow.GraphDef() graph_def.ParseFromString(f.read()) tensorflow.import_graph_def(graph_def, input_map=input_map, name='') else: #print('Model directory: %s' % model_exp) meta_file, ckpt_file = get_model_filenames(model_exp) #print('Metagraph file: %s' % meta_file) #print('Checkpoint file: %s' % ckpt_file) saver = tensorflow.train.import_meta_graph(os.path.join( model_exp, meta_file), input_map=input_map) saver.restore(tensorflow.get_default_session(), os.path.join(model_exp, ckpt_file))
def __call__(self, *inputvals): assert len(inputvals) == len(self.nondata_inputs) + len( self.data_inputs) nondata_vals = inputvals[0:len(self.nondata_inputs)] data_vals = inputvals[len(self.nondata_inputs):] feed_dict = dict(zip(self.nondata_inputs, nondata_vals)) n = data_vals[0].shape[0] for v in data_vals[1:]: assert v.shape[0] == n for i_start in range(0, n, self.batch_size): slice_vals = [ v[i_start:min(i_start + self.batch_size, n)] for v in data_vals ] for (var, val) in zip(self.data_inputs, slice_vals): feed_dict[var] = val results = tf.get_default_session().run(self.outputs, feed_dict=feed_dict) if i_start == 0: sum_results = results else: for i in range(len(results)): sum_results[i] = sum_results[i] + results[i] for i in range(len(results)): sum_results[i] = sum_results[i] / n return sum_results
def plot_traces(mcmc_results, archetype_labels, out_dir=None): session = tf1.get_default_session() if session is None: session = tf1.Session() indices = session.run(generate.free_to_matrix(len(archetype_labels))) tlp = mcmc_results[ "kernel_results"].inner_results.proposed_results.target_log_prob plots.trace( tlp, title="Log-Likelihood of MCMC Samples", xlabel="$t$", ylabel="$\\log P(X|\\Theta_t)$", filename=None if out_dir is None else f"{out_dir}/ll_mcmc_samples.png") plots.trace( mcmc_results["wait_time"], xlabel="$t$", title="Parameter Trace: Wait Time", ylabel="$w$", filename=None if out_dir is None else f"{out_dir}/trace_wait_time.png") plots.trace( mcmc_results["field"], xlabel="$t$", title="Parameter Trace: $P(Deck)$", plots=archetype_labels, filename=None if out_dir is None else f"{out_dir}/trace_field.png") plots.trace( mcmc_results["matchups_free"], xlabel="$t$", title="Parameter Trace: $P(w|d_i vs. d_j)$", plots=[ "{} vs. {}".format(archetype_labels[pair[0]], archetype_labels[pair[1]]) for pair in indices ], filename=None if out_dir is None else f"{out_dir}/trace_matchups.png")