def __init__(self, env, policy_net, summary_writer, saver=None): self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos") self.video_dir = os.path.abspath(self.video_dir) self.env = Monitor(env, directory=self.video_dir, video_callable=lambda x: True, resume=True) self.global_policy_net = policy_net self.summary_writer = summary_writer self.saver = saver self.sp = StateProcessor() self.checkpoint_path = os.path.abspath(os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) try: os.makedirs(self.video_dir) except FileExistsError: pass # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator(policy_net.num_outputs) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, policy_net, summary_writer, saver=None): self.global_policy_net = policy_net self.summary_writer = summary_writer self.saver = saver #self.sp = StateProcessor() self.env = CDLL('./PythonAccessToSim.so') self.env.step.restype = step_result self.env.send_command.restype = c_int self.env.initialize.restype = c_int self.env.recieve_state_gui.restype = step_result self.actions = list(range(0,3*Num_Targets)) self.checkpoint_path = os.path.abspath(os.path.join(summary_writer.get_logdir(), "./checkpoints/model")) print(self.checkpoint_path) # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator(policy_net.num_outputs) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, policy_net, summary_writer, saver=None): self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos") self.video_dir = os.path.abspath(self.video_dir) self.env = Monitor(env, directory=self.video_dir, video_callable=lambda x: True, resume=True) self.global_policy_net = policy_net self.summary_writer = summary_writer self.saver = saver self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) try: os.makedirs(self.video_dir) except OSError as e: # FileExistsError was added in Python 3.3; You can't use FileExistsError. # https: // stackoverflow.com / questions / 20790580 / python - specifically - handle - file - exists - exception # Use errno.EEXIST pass # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator(policy_net.num_outputs) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, global_net, summary_writer, saver=None): self.env = env self.global_net = global_net self.summary_writer = summary_writer self.saver = saver self.sp = StateProcessor() self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos") self.video_dir = os.path.abspath(self.video_dir) self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) try: os.makedirs(self.video_dir) except: pass # Local policy net with tf.variable_scope("policy_eval"): self.local_net = PolicyValueEstimator() # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, policy_net, summary_writer, saver=None): self.global_policy_net = policy_net self.summary_writer = summary_writer self.saver = saver self.env = env # Correct the path self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) print('[PM] checkpoint_path: {}'.format(self.checkpoint_path)) # Local policy net with tf.variable_scope("policy_eval"): if LSTM_POLICY: self.policy_net = LSTMPolicyEstimator(policy_net.num_outputs) else: self.policy_net = PolicyEstimator(policy_net.num_outputs) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, policy_net): self.env = env self.global_policy_net = policy_net # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator(policy_net.num_outputs) self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, policy_net, task): self.env = env self.global_policy_net = policy_net self.task = task # Local policy net with tf.variable_scope("policy_visualization"): self.policy_net = PolicyEstimator(policy_net.num_outputs, state_dims=self.env.get_state_size()) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="policy_visualization", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
def __init__(self, env, env_id, curriculum, policy_net, saver=None, n_eval=10, logfile=None, checkpoint_path=None): self.env = env self.env_id = env_id self.curriculum = curriculum self.global_policy_net = policy_net self.saver = saver self.n_eval = n_eval self.checkpoint_path = checkpoint_path self.logger = logging.getLogger('eval runs {}'.format(env_id)) hdlr = logging.FileHandler(logfile) formatter = logging.Formatter( '[%(asctime)s] [%(levelname)s] %(message)s') hdlr.setFormatter(formatter) self.logger.addHandler(hdlr) self.logger.setLevel(logging.INFO) # Local policy net with tf.variable_scope("policy_eval_{}".format(env_id)): self.policy_net = PolicyEstimator( policy_net.num_outputs, state_dims=self.env.get_state_size()) #Directory to save checkpoints to. Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global_{}".format(env_id), collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval_{}".format(env_id), collection=tf.GraphKeys.TRAINABLE_VARIABLES)) self.epochs = 0
def __init__(self, env, policy_net, summary_writer, saver=None): self.env = env self.global_policy_net = policy_net self.summary_writer = summary_writer self.saver = saver self.counter = 0 self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator(policy_net.num_outputs, policy_net.observation_space) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
# different policy and value nets for all tasks policy_nets = [] value_nets = [] for e in range(len(envs)): with tf.variable_scope("global_{}".format(e)) as vs: policy_nets.append(PolicyEstimator( num_outputs=len(VALID_ACTIONS), state_dims=envs[e].get_state_size())) value_nets.append(ValueEstimator( reuse=True, state_dims=envs[e].get_state_size())) if FLAGS.shared_final_layer: # make all final layer weights the same initial_copy_ops = [] for e in range(1, len(envs)): initial_copy_ops += make_copy_params_op( tf.contrib.slim.get_variables(scope="global_0/policy_net", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="global_{}/policy_net".format(e), collection=tf.GraphKeys.TRAINABLE_VARIABLES)) initial_copy_ops += make_copy_params_op( tf.contrib.slim.get_variables(scope="global_0/value_net", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables(scope="global_{}/value_net".format(e), collection=tf.GraphKeys.TRAINABLE_VARIABLES)) # Global step iterator global_counter = itertools.count() # Create worker graphs workers = [] for worker_id in range(NUM_WORKERS): # add a curriculum for avoid task env_id = worker_id % len(envs) curriculum = None if 'avoid' in envs[env_id].task:
def __init__(self, envs, policy_net, domain, instances, summary_writer, saver=None): self.stats_dir = os.path.join(summary_writer.get_logdir(), "../stats") self.stats_dir = os.path.abspath(self.stats_dir) self.n = envs[0].num_state_vars self.domain = domain self.instances = instances self.N = len(instances) self.envs = envs self.global_policy_net = policy_net # Construct adjacency list self.adjacency_lists = [None] * self.N self.single_adj_preprocessed_list = [None] * self.N for i in range(self.N): self.instance_parser = InstanceParser(self.domain, self.instances[i]) self.fluent_feature_dims, self.nonfluent_feature_dims = self.instance_parser.get_feature_dims( ) self.nf_features = self.instance_parser.get_nf_features() adjacency_list = self.instance_parser.get_adjacency_list() self.adjacency_lists[i] = nx.adjacency_matrix( nx.from_dict_of_lists(adjacency_list)) self.single_adj_preprocessed_list[i] = preprocess_adj( self.adjacency_lists[i]) self.summary_writer = summary_writer self.saver = saver self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) try: os.makedirs(self.stats_dir) except: pass # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator( policy_net.num_inputs, policy_net.N, policy_net.num_hidden1, policy_net.num_hidden2, policy_net.num_hidden_transition, policy_net.num_outputs, policy_net.fluent_feature_dims, policy_net.nonfluent_feature_dims, policy_net.activation, policy_net.learning_rate) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES)) self.num_inputs = policy_net.num_inputs
def __init__(self, envs, policy_net, domain, instances, neighbourhood, summary_writer, saver=None): self.stats_dir = os.path.join(summary_writer.get_logdir(), "../stats") self.stats_dir = os.path.abspath(self.stats_dir) self.domain = domain self.instances = instances self.N = len(instances) self.num_nodes_list = policy_net.num_nodes_list self.num_adjacency_list = policy_net.num_adjacency_list self.envs = envs self.global_policy_net = policy_net # Construct adjacency list self.adjacency_lists = [None] * self.N self.nf_features = [None] * self.N self.adjacency_lists_with_biases = [None] * self.N for i in range(self.N): self.fluent_feature_dims, self.nonfluent_feature_dims = self.envs[ i].get_feature_dims() self.nf_features[i] = self.envs[i].get_nf_features() adjacency_list = self.envs[i].get_adjacency_list() self.adjacency_lists[i] = [ get_adj_mat_from_list(aj) for aj in adjacency_list ] self.adjacency_lists_with_biases[i] = [ process.adj_to_bias(np.array([aj]), [self.num_nodes_list[i]], nhood=neighbourhood)[0] for aj in self.adjacency_lists[i] ] self.summary_writer = summary_writer self.saver = saver self.checkpoint_path = os.path.abspath( os.path.join(summary_writer.get_logdir(), "../checkpoints/model")) try: os.makedirs(self.stats_dir) except: pass # Local policy net with tf.variable_scope("policy_eval"): self.policy_net = PolicyEstimator( policy_net.num_nodes_list, policy_net.fluent_feature_dims, policy_net.nonfluent_feature_dims, policy_net.N, policy_net.num_valid_actions_list, policy_net.action_details_list, policy_net.num_graph_fluent_list, policy_net.num_gcn_hidden, policy_net.num_action_dim, policy_net.num_decoder_dim, policy_net.num_adjacency_list, policy_net.num_gat_layers, policy_net.activation, policy_net.learning_rate) # Op to copy params from global policy/value net parameters self.copy_params_op = make_copy_params_op( tf.contrib.slim.get_variables( scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES), tf.contrib.slim.get_variables( scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))