def run(self): self.env = gym.make(self.args.task) self.env.seed(randint(0, 999999)) if self.monitor: self.env.monitor.start('monitor/', force=True) # tensorflow variables (same as in model.py) self.observation_size = self.env.observation_space.shape[0] self.action_size = np.prod(self.env.action_space.shape) self.hidden_size = 64 weight_init = tf.random_uniform_initializer(-0.05, 0.05) bias_init = tf.constant_initializer(0) # tensorflow model of the policy self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) self.debug = tf.constant([2, 2]) with tf.variable_scope("policy-a"): h1 = fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1") h1 = tf.nn.relu(h1) h2 = fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2") h2 = tf.nn.relu(h2) h3 = fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3") action_dist_logstd_param = tf.Variable((.01*np.random.randn(1, self.action_size)).astype(np.float32), name="policy_logstd") self.action_dist_mu = h3 self.action_dist_logstd = tf.tile(action_dist_logstd_param, tf.stack((tf.shape(self.action_dist_mu)[0], 1))) config = tf.ConfigProto(device_count={'GPU': 0}) self.session = tf.Session(config=config) self.session.run(tf.global_variables_initializer()) var_list = tf.trainable_variables() self.set_policy = SetPolicyWeights(self.session, var_list) while True: # get a task, or wait until it gets one next_task = self.task_q.get(block=True) if next_task == 1: # the task is an actor request to collect experience path = self.rollout() self.task_q.task_done() self.result_q.put(path) elif next_task == 2: print("kill message") if self.monitor: self.env.monitor.close() self.task_q.task_done() break else: # the task is to set parameters of the actor policy self.set_policy(next_task) # super hacky method to make sure when we fill the queue with set parameter tasks, # an actor doesn't finish updating before the other actors can accept their own tasks. time.sleep(0.1) self.task_q.task_done() return
def create_net(self, shape): hidden_size = 64 print(shape) self.x = tf.placeholder(tf.float32, shape=[None, shape], name="x") self.y = tf.placeholder(tf.float32, shape=[None], name="y") weight_init = tf.random_uniform_initializer(-0.05, 0.05) bias_init = tf.constant_initializer(0) with tf.variable_scope("VF"): h1 = tf.nn.relu( utils.fully_connected(self.x, shape, hidden_size, weight_init, bias_init, "h1")) h2 = tf.nn.relu( utils.fully_connected(h1, hidden_size, hidden_size, weight_init, bias_init, "h2")) h3 = utils.fully_connected(h2, hidden_size, 1, weight_init, bias_init, "h3") self.net = tf.reshape(h3, (-1, )) l2 = tf.nn.l2_loss(self.net - self.y) self.train = tf.train.AdamOptimizer().minimize(l2) self.session.run(tf.global_variables_initializer())
def build_graph(self): self.image_latent = image_encoder(self.image_ph, [128, 512], self.keep_prob_ph, training=self.training_ph) self.text_latent = text_encoder(self.text_ph, [128, 512], self.keep_prob_ph, training=self.training_ph) # atanh fuse_latent = tf.concat([self.image_latent, self.text_latent], axis=1) dense_latent = fully_connected(fuse_latent, 512, 'dense_latent') coding_layer = fully_connected(dense_latent, self.latent_len, 'coding_layer', activation=None) self.fuse_hashcode = tf.tanh( self.alpha * coding_layer) + 0.001 * tf.norm((1 / self.alpha))**2 self.image_tilde = image_decoder(self.fuse_hashcode, [128, self.input_len_img], self.keep_prob_ph, training=self.training_ph) self.text_tilde = text_decoder(self.fuse_hashcode, [128, self.input_len_txt], self.keep_prob_ph, training=self.training_ph) self.recon_image_loss = tf.reduce_mean( tf.square(self.image_ph - self.image_tilde)) self.recon_text_loss = tf.reduce_mean( tf.square(self.text_ph - self.text_tilde)) self._classify_vars() self._init_summary()
def inference(self, images): print '================== Resnet structure =======================' print 'num_residual_units: ', self.num_residual_units print 'channels in each block: ', self.filters print 'stride in each block: ', self.strides print '================== constructing network ====================' x = utils.input_data(images, self.data_format) x = tf.cast(x, self.float_type) print 'shape input: ', x.get_shape() with tf.variable_scope('conv1'): trainable_ = False if self.fix_blocks > 0 else True self.fix_blocks -= 1 x = utils.conv2d_same(x, 64, 7, 2, trainable=trainable_, data_format=self.data_format, initializer=self.initializer, float_type=self.float_type) x = utils.batch_norm('BatchNorm', x, trainable_, self.data_format, self.mode, use_gamma=self.bn_use_gamma, use_beta=self.bn_use_beta, bn_epsilon=self.bn_epsilon, bn_ema=self.bn_ema, float_type=self.float_type) x = utils.relu(x) x = utils.max_pool(x, 3, 2, self.data_format) print 'shape after pool1: ', x.get_shape() for block_index in range(len(self.num_residual_units)): for unit_index in range(self.num_residual_units[block_index]): with tf.variable_scope('block%d' % (block_index + 1)): with tf.variable_scope('unit_%d' % (unit_index + 1)): stride = 1 if unit_index == self.num_residual_units[ block_index] - 1: stride = self.strides[block_index] trainable_ = False if self.fix_blocks > 0 else True self.fix_blocks -= 1 x = utils.bottleneck_residual( x, self.filters[block_index], stride, data_format=self.data_format, initializer=self.initializer, rate=self.rate[block_index], trainable=trainable_, bn_mode=self.mode, bn_use_gamma=self.bn_use_gamma, bn_use_beta=self.bn_use_beta, bn_epsilon=self.bn_epsilon, bn_ema=self.bn_ema, float_type=self.float_type) print 'shape after block %d: ' % (block_index + 1), x.get_shape() with tf.variable_scope('logits'): x = utils.global_avg_pool(x, self.data_format) self.logits = utils.fully_connected(x, self.num_classes, trainable=True, data_format=self.data_format, initializer=self.initializer, float_type=self.float_type) self.logits = tf.reshape(self.logits, (-1, self.num_classes)) self.predictions = tf.nn.softmax(self.logits) print '================== network constructed ====================' return self.logits
pickle.dump(data, pickle_out) # Save a picture of the plotted data plot_data(train_data, train_label, 'data', test_data, test_label) optimizer = keras.optimizers.RMSprop(lr=0.01, decay=0) # Generate model and train on just the training data. n_hidden_layers = 5 n_neurons_per_layer = 25 good_model_params = { "n_hidden_layers": n_hidden_layers, "n_neurons_per_layer": n_neurons_per_layer, "n_points": n_points, "seed_value": seed_value } model = fully_connected(n_hidden_layers, n_neurons_per_layer) model.compile(optimizer=optimizer, loss=binary_crossentropy, metrics=['binary_accuracy']) # Create directory and callbacks to save model+checkpoints params_path = os.path.join(directory, "good_model_params") pickle_out = open(params_path, "wb") pickle.dump(good_model_params, pickle_out) filepath = os.path.join(directory, "good_model_{epoch}.hdf5") checkpoint = keras.callbacks.ModelCheckpoint(filepath, verbose=0, save_best_only=False, save_weights_only=True) tensorboard_callback = keras.callbacks.TensorBoard(histogram_freq=1) callbacks_list = [checkpoint, tensorboard_callback]
H = X """ # Number of filters and layers of the CNN n_filters = [3, 2, 1] for layer_i, n_filters_i in enumerate(n_filters): H, W = conv2d(H, n_filters_i, k_h=3, k_w=3, d_h=1, d_w=1, name=str(layer_i)) H = tf.nn.relu(H) if layer_i % 2 == 1: H = tf.layers.max_pooling2d(H, pool_size=(2, 2), strides=(1, 1), padding='SAME', name=str(layer_i)) """ # Number of filters and layers of the FCN layers = [100, 100, 4] for layer_i, n_output_i in enumerate(layers): H, W = fully_connected(H, n_output=n_output_i, name=layer_i) if layer_i == len(layers) - 1: H = tf.nn.softmax(H) else: H = tf.nn.relu(H) Y_predicted = H # Cost function loss = binary_cross_entropy(Y_predicted, Y) cost = tf.reduce_mean(tf.reduce_sum(loss, 1)) # Measure of accuracy predicted_y = tf.argmax(Y_predicted, 1) actual_y = tf.argmax(Y, 1) correct_prediction = tf.equal(predicted_y, actual_y)
def make_model(self): self.observation_size = self.observation_space.shape[0] self.action_size = np.prod(self.action_space.shape) self.hidden_size = 64 weight_init = tf.random_uniform_initializer(-0.05, 0.05) bias_init = tf.constant_initializer(0) config = tf.ConfigProto(device_count={'GPU': 0}) self.session = tf.Session(config=config) self.obs = tf.placeholder(tf.float32, [None, self.observation_size]) self.action = tf.placeholder(tf.float32, [None, self.action_size]) self.advantage = tf.placeholder(tf.float32, [None]) self.oldaction_dist_mu = tf.placeholder(tf.float32, [None, self.action_size]) self.oldaction_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size]) with tf.variable_scope("policy"): h1 = utils.fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1") h1 = tf.nn.relu(h1) h2 = utils.fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2") h2 = tf.nn.relu(h2) h3 = utils.fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3") action_dist_logstd_param = tf.Variable( (.01 * np.random.randn(1, self.action_size)).astype( np.float32), name="policy_logstd") # means for each action self.action_dist_mu = h3 # log standard deviations for each actions self.action_dist_logstd = tf.tile( action_dist_logstd_param, tf.stack((tf.shape(self.action_dist_mu)[0], 1))) batch_size = tf.shape(self.obs)[0] # what are the probabilities of taking self.action, given new and old distributions log_p_n = utils.gauss_log_prob(self.action_dist_mu, self.action_dist_logstd, self.action) log_oldp_n = utils.gauss_log_prob(self.oldaction_dist_mu, self.oldaction_dist_logstd, self.action) # tf.exp(log_p_n) / tf.exp(log_oldp_n) ratio = tf.exp(log_p_n - log_oldp_n) # importance sampling of surrogate loss (L in paper) surr = -tf.reduce_mean(ratio * self.advantage) var_list = tf.trainable_variables() batch_size_float = tf.cast(batch_size, tf.float32) # kl divergence and shannon entropy kl = utils.gauss_KL(self.oldaction_dist_mu, self.oldaction_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size_float ent = utils.gauss_ent(self.action_dist_mu, self.action_dist_logstd) / batch_size_float self.losses = [surr, kl, ent] # policy gradient self.pg = utils.flatgrad(surr, var_list) # KL divergence w/ itself, with first argument kept constant. kl_firstfixed = utils.gauss_selfKL_firstfixed( self.action_dist_mu, self.action_dist_logstd) / batch_size_float # gradient of KL w/ itself grads = tf.gradients(kl_firstfixed, var_list) # what vector we're multiplying by self.flat_tangent = tf.placeholder(tf.float32, [None]) shapes = map(utils.var_shape, var_list) start = 0 tangents = [] for shape in shapes: size = np.prod(shape) param = tf.reshape(self.flat_tangent[start:(start + size)], shape) tangents.append(param) start += size # gradient of KL w/ itself * tangent gvp = [tf.reduce_sum(g * t) for (g, t) in zip(grads, tangents)] # 2nd gradient of KL w/ itself * tangent self.fvp = utils.flatgrad(gvp, var_list) # the actual parameter values self.gf = utils.GetFlat(self.session, var_list) # call this to set parameter values self.sff = utils.SetFromFlat(self.session, var_list) self.session.run(tf.global_variables_initializer()) # value function # self.vf = VF(self.session) self.vf = LinearVF() self.get_policy = utils.GetPolicyWeights(self.session, var_list)