def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height # Define the tensorflow neural network # 1. Input: self.input_states = tf.placeholder( tf.float32, shape=[None, 4, board_height, board_width]) self.input_state = tf.transpose(self.input_states, [0, 2, 3, 1]) # 2. Common Networks Layers self.conv1 = tf.layers.conv2d(inputs=self.input_state, filters=32, kernel_size=[3, 3], padding="same", data_format="channels_last", activation=tf.nn.relu) self.conv2 = tf.layers.conv2d(inputs=self.conv1, filters=64, kernel_size=[3, 3], padding="same", data_format="channels_last", activation=tf.nn.relu) self.conv3 = tf.layers.conv2d(inputs=self.conv2, filters=128, kernel_size=[3, 3], padding="same", data_format="channels_last", activation=tf.nn.relu) # 3-1 Action Networks self.action_conv = tf.layers.conv2d(inputs=self.conv3, filters=4, kernel_size=[1, 1], padding="same", data_format="channels_last", activation=tf.nn.relu) # Flatten the tensor self.action_conv_flat = tf.reshape( self.action_conv, [-1, 4 * board_height * board_width]) # 3-2 Full connected layer, the output is the log probability of moves # on each slot on the board self.action_fc = tf.layers.dense(inputs=self.action_conv_flat, units=board_height * board_width, activation=tf.nn.log_softmax) # 4 Evaluation Networks self.evaluation_conv = tf.layers.conv2d(inputs=self.conv3, filters=2, kernel_size=[1, 1], padding="same", data_format="channels_last", activation=tf.nn.relu) self.evaluation_conv_flat = tf.reshape( self.evaluation_conv, [-1, 2 * board_height * board_width]) self.evaluation_fc1 = tf.layers.dense(inputs=self.evaluation_conv_flat, units=64, activation=tf.nn.relu) # output the score of evaluation on current state self.evaluation_fc2 = tf.layers.dense(inputs=self.evaluation_fc1, units=1, activation=tf.nn.tanh) # Define the Loss function # 1. Label: the array containing if the game wins or not for each state self.labels = tf.placeholder(tf.float32, shape=[None, 1]) # 2. Predictions: the array containing the evaluation score of each state # which is self.evaluation_fc2 # 3-1. Value Loss function self.value_loss = tf.losses.mean_squared_error(self.labels, self.evaluation_fc2) # 3-2. Policy Loss function self.mcts_probs = tf.placeholder( tf.float32, shape=[None, board_height * board_width]) self.policy_loss = tf.negative(tf.reduce_mean( tf.reduce_sum(tf.multiply(self.mcts_probs, self.action_fc), 1))) # 3-3. L2 penalty (regularization) l2_penalty_beta = 1e-4 vars = tf.trainable_variables() l2_penalty = l2_penalty_beta * tf.add_n( [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()]) # 3-4 Add up to be the Loss function self.loss = self.value_loss + self.policy_loss + l2_penalty # Define the optimizer we use for training self.learning_rate = tf.placeholder(tf.float32) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) # Make a session self.session = tf.Session() # calc policy entropy, for monitoring only self.entropy = tf.negative(tf.reduce_mean( tf.reduce_sum(tf.exp(self.action_fc) * self.action_fc, 1))) # Initialize variables init = tf.global_variables_initializer() self.session.run(init) # For saving and restoring self.saver = tf.train.Saver() if model_file is not None: self.restore_model(model_file)
def __init__(self, session, player_id, info_state_size, num_actions, loss_str="rpg", loss_class=None, hidden_layers_sizes=(128, ), batch_size=16, critic_learning_rate=0.01, pi_learning_rate=0.001, entropy_cost=0.01, num_critic_before_pi=8, additional_discount_factor=1.0, max_global_gradient_norm=None, optimizer_str="sgd"): """Initialize the PolicyGradient agent. Args: session: Tensorflow session. player_id: int, player identifier. Usually its position in the game. info_state_size: int, info_state vector size. num_actions: int, number of actions per info state. loss_str: string or None. If string, must be one of ["rpg", "qpg", "rm", "a2c"] and defined in `_get_loss_class`. If None, a loss class must be passed through `loss_class`. Defaults to "rpg". loss_class: Class or None. If Class, it must define the policy gradient loss. If None a loss class in a string format must be passed through `loss_str`. Defaults to None. hidden_layers_sizes: iterable, defines the neural network layers. Defaults to (128,), which produces a NN: [INPUT] -> [128] -> ReLU -> [OUTPUT]. batch_size: int, batch size to use for Q and Pi learning. Defaults to 128. critic_learning_rate: float, learning rate used for Critic (Q or V). Defaults to 0.001. pi_learning_rate: float, learning rate used for Pi. Defaults to 0.001. entropy_cost: float, entropy cost used to multiply the entropy loss. Can be set to None to skip entropy computation. Defaults to 0.001. num_critic_before_pi: int, number of Critic (Q or V) updates before each Pi update. Defaults to 8 (every 8th critic learning step, Pi also learns). additional_discount_factor: float, additional discount to compute returns. Defaults to 1.0, in which case, no extra discount is applied. None that users must provide *only one of* `loss_str` or `loss_class`. max_global_gradient_norm: float or None, maximum global norm of a gradient to which the gradient is shrunk if its value is larger. optimizer_str: String defining which optimizer to use. Supported values are {sgd, adam} """ assert bool(loss_str) ^ bool( loss_class), "Please provide only one option." self._kwargs = locals() loss_class = loss_class if loss_class else self._get_loss_class( loss_str) self.player_id = player_id self._session = session self._num_actions = num_actions self._layer_sizes = hidden_layers_sizes self._batch_size = batch_size self._extra_discount = additional_discount_factor self._num_critic_before_pi = num_critic_before_pi self._episode_data = [] self._dataset = collections.defaultdict(list) self._prev_time_step = None self._prev_action = None # Step counters self._step_counter = 0 self._episode_counter = 0 self._num_learn_steps = 0 # Keep track of the last training loss achieved in an update step. self._last_loss_value = None # Placeholders self._info_state_ph = tf.placeholder(shape=[None, info_state_size], dtype=tf.float32, name="info_state_ph") self._action_ph = tf.placeholder(shape=[None], dtype=tf.int32, name="action_ph") self._return_ph = tf.placeholder(shape=[None], dtype=tf.float32, name="return_ph") # Network # activate final as we plug logit and qvalue heads afterwards. self._net_torso = snt.nets.MLP(output_sizes=self._layer_sizes, activate_final=True) torso_out = self._net_torso(self._info_state_ph) self._policy_logits_layer = snt.Linear(output_size=self._num_actions, name="policy_head") self.policy_logits_network = snt.Sequential( [self._net_torso, self._policy_logits_layer]) self._policy_logits = self._policy_logits_layer(torso_out) self._policy_probs = tf.nn.softmax(self._policy_logits) self._savers = [] used_output_size = 1 if loss_class.__name__ == "BatchA2CLoss" else self._num_actions self._q_values_layer = snt.Linear(output_size=used_output_size, name="q_values_head") # Add baseline (V) head for A2C. if loss_class.__name__ == "BatchA2CLoss": self._baseline = tf.squeeze(self._q_values_layer(torso_out), axis=1) else: # Add q-values head otherwise self._q_values = self._q_values_layer(torso_out) # Critic loss # Baseline loss in case of A2C if loss_class.__name__ == "BatchA2CLoss": self._critic_loss = tf.reduce_mean( tf.losses.mean_squared_error(labels=self._return_ph, predictions=self._baseline)) else: # Q-loss otherwise. action_indices = tf.stack( [tf.range(tf.shape(self._q_values)[0]), self._action_ph], axis=-1) value_predictions = tf.gather_nd(self._q_values, action_indices) self._critic_loss = tf.reduce_mean( tf.losses.mean_squared_error(labels=self._return_ph, predictions=value_predictions)) if optimizer_str == "adam": self._critic_optimizer = tf.train.AdamOptimizer( learning_rate=critic_learning_rate) elif optimizer_str == "sgd": self._critic_optimizer = tf.train.GradientDescentOptimizer( learning_rate=critic_learning_rate) else: raise ValueError("Not implemented, choose from 'adam' and 'sgd'.") def minimize_with_clipping(optimizer, loss): grads_and_vars = optimizer.compute_gradients(loss) if max_global_gradient_norm is not None: grads, variables = zip(*grads_and_vars) grads, _ = tf.clip_by_global_norm(grads, max_global_gradient_norm) grads_and_vars = list(zip(grads, variables)) return optimizer.apply_gradients(grads_and_vars) self._critic_learn_step = minimize_with_clipping( self._critic_optimizer, self._critic_loss) # Pi loss pg_class = loss_class(entropy_cost=entropy_cost) if loss_class.__name__ == "BatchA2CLoss": self._pi_loss = pg_class.loss(policy_logits=self._policy_logits, baseline=self._baseline, actions=self._action_ph, returns=self._return_ph) else: self._pi_loss = pg_class.loss(policy_logits=self._policy_logits, action_values=self._q_values) if optimizer_str == "adam": self._pi_optimizer = tf.train.AdamOptimizer( learning_rate=pi_learning_rate) elif optimizer_str == "sgd": self._pi_optimizer = tf.train.GradientDescentOptimizer( learning_rate=pi_learning_rate) self._pi_learn_step = minimize_with_clipping(self._pi_optimizer, self._pi_loss) self._loss_str = loss_str self._initialize()
def predict(self, dataset: Dataset, loaded_model=False, batch_size=64, draw=False): with tf.device('/cpu:1'): with tf.Graph().as_default() as g: # resource prellocation self.batch_size = batch_size self.mean_locs = [] # expectation of locations self.sampled_locs = [ ] # sampled locations ~N(mean_locs[.], loc_sd) self.baselines = [] # baseline, the value prediction self.glimpse_images = [] # to show in window # set the learning rate self.global_step = tf.Variable(0, trainable=False) self.lr = tf.train.exponential_decay(self.initLr, self.global_step, self.lrDecayFreq, self.lrDecayRate, staircase=True) # preallocate x, y, baseline labels = tf.placeholder( "float32", shape=[self.batch_size, self.n_classes]) self.labels_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size), name="labels_raw") self.onehot_labels_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size, self.n_classes), name="labels_onehot") self.inputs_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size, self.img_size * self.img_size), name="images") # declare the model parameters, here're naming rule: # the 1st captical letter: weights or bias (W = weights, B = bias) # the 2nd lowercase letter: the network (e.g.: g = glimpse network) # the 3rd and 4th letter(s): input-output mapping, which is clearly written in the variable name argument self.Wg_l_h = self.weight_variable( (2, self.hl_size), "glimpseNet_wts_location_hidden", True) self.Bg_l_h = self.weight_variable( (1, self.hl_size), "glimpseNet_bias_location_hidden", True) self.Wg_g_h = self.weight_variable( (self.totalSensorBandwidth, self.hg_size), "glimpseNet_wts_glimpse_hidden", True) self.Bg_g_h = self.weight_variable( (1, self.hg_size), "glimpseNet_bias_glimpse_hidden", True) self.Wg_hg_gf1 = self.weight_variable( (self.hg_size, self.g_size), "glimpseNet_wts_hiddenGlimpse_glimpseFeature1", True) self.Wg_hl_gf1 = self.weight_variable( (self.hl_size, self.g_size), "glimpseNet_wts_hiddenLocation_glimpseFeature1", True) self.Bg_hlhg_gf1 = self.weight_variable( (1, self.g_size), "glimpseNet_bias_hGlimpse_hLocs_glimpseFeature1", True) self.Wc_g_h = self.weight_variable( (self.cell_size, self.g_size), "coreNet_wts_glimpse_hidden", True) self.Bc_g_h = self.weight_variable( (1, self.g_size), "coreNet_bias_glimpse_hidden", True) self.Wr_h_r = self.weight_variable( (self.cell_out_size, self.img_size**2), "reconstructionNet_wts_hidden_action", True) self.Br_h_r = self.weight_variable( (1, self.img_size**2), "reconstructionNet_bias_hidden_action", True) self.Wb_h_b = self.weight_variable( (self.g_size, 1), "baselineNet_wts_hiddenState_baseline", True) self.Bb_h_b = self.weight_variable( (1, 1), "baselineNet_bias_hiddenState_baseline", True) self.Wl_h_l = self.weight_variable( (self.cell_out_size, 2), "locationNet_wts_hidden_location", True) self.Bl_h_l = self.weight_variable( (1, 2), "locationNet_bias_hidden_location", True) self.Wa_h_a = self.weight_variable( (self.cell_out_size, self.n_classes), "actionNet_wts_hidden_action", True) self.Ba_h_a = self.weight_variable( (1, self.n_classes), "actionNet_bias_hidden_action", True) # query the model ouput outputs = self.model() # convert list of tensors to one big tensor self.sampled_locs = tf.concat(axis=0, values=self.sampled_locs) self.sampled_locs = tf.reshape( self.sampled_locs, (self.nGlimpses, self.batch_size, 2)) self.sampled_locs = tf.transpose(self.sampled_locs, [1, 0, 2]) self.mean_locs = tf.concat(axis=0, values=self.mean_locs) self.mean_locs = tf.reshape( self.mean_locs, (self.nGlimpses, self.batch_size, 2)) self.mean_locs = tf.transpose(self.mean_locs, [1, 0, 2]) self.glimpse_images = tf.concat(axis=0, values=self.glimpse_images) # compute the reward # reconstructionCost, reconstruction, train_op_r = self.preTrain(outputs) cost, reward, predicted_labels, correct_labels, train_op, b, avg_b, rminusb, lr = \ self.calc_reward(outputs) saver = tf.train.Saver() sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) saver.restore(sess, self.ckpt_path) self.evaluate(dataset, sess, reward, predicted_labels, correct_labels, self.glimpse_images, draw=draw) sess.close()
def main(_): mnist = input_data.read_data_sets('/tmp/data/', one_hot=True, seed=12345) random_weight_vector = np.random.uniform(low=0.1, high=1.9, size=TRAIN_INPUT_SIZE) x = tf.placeholder(tf.float32, shape=(None, INPUT_DIM), name='x') y = tf.placeholder(tf.float32, shape=(None, OUTPUT_DIM), name='y') weight = tf.placeholder(tf.float32, shape=(None, OUTPUT_DIM), name='weight') parallel_alphas = tf.placeholder(tf.float32, shape=(FLAGS.num_parallel_alphas, OUTPUT_DIM), name='parallel_alphas') unstack_parallel_alphas = tf.unstack(parallel_alphas, axis=0) parallel_logits = [] parallel_losses = [] parallel_optimizers = [] validation_metrics = [] test_metrics = [] all_test_metrics = [] with tf.variable_scope('classifier'): for alpha_index in range(FLAGS.num_parallel_alphas): logits = classifier(x) alpha = tf.reshape(unstack_parallel_alphas[alpha_index], shape=[OUTPUT_DIM, 1]) optimizer, loss = optimization(logits, y, weight, alpha, LEARNING_RATE) parallel_logits.append(logits) parallel_losses.append(loss) parallel_optimizers.append(optimizer) init = tf.global_variables_initializer() classifiers_init = tf.variables_initializer( tf.global_variables(scope='classifier')) with tf.Session() as sess: sess.run(init) # GetCandidatesAlpha (Algorithm 2 in paper) sample_alphas = np.zeros(shape=(0, OUTPUT_DIM)) for alpha_batch_index in range(FLAGS.num_alpha_batches): sess.run(classifiers_init) if FLAGS.uniform_weights: alpha_batch = np.zeros(shape=(FLAGS.num_parallel_alphas, OUTPUT_DIM)) elif FLAGS.random_alpha or alpha_batch_index < 1: alpha_batch = sample_from_ball( size=(FLAGS.num_parallel_alphas, OUTPUT_DIM), sampling_radius=FLAGS.sampling_radius) sample_alphas = np.concatenate([sample_alphas, alpha_batch]) else: # Use LCB to generate candidates. alpha_batch = np.zeros(shape=(0, OUTPUT_DIM)) sample_metrics = validation_metrics[:] for alpha_index in range(FLAGS.num_parallel_alphas): kernel = RBF(length_scale=FLAGS.sampling_radius, length_scale_bounds=( FLAGS.sampling_radius * 1e-3, FLAGS.sampling_radius * 1e3)) * ConstantKernel(1.0, (1e-3, 1e3)) gp = GaussianProcessRegressor(kernel=kernel, alpha=1e-4).fit( sample_alphas, np.log1p(sample_metrics)) candidates = sample_from_ball((10000, OUTPUT_DIM), FLAGS.sampling_radius) metric_mles, metric_stds = gp.predict(candidates, return_std=True) metric_lcbs = np.maximum( np.expm1(metric_mles - 1.0 * metric_stds), 0.0) metric_lcbs += np.random.random( size=metric_lcbs.shape) * 0.001 # break ties best_index = np.argmin(metric_lcbs) best_alpha = [candidates[best_index]] best_alpha_metric_estimate = np.minimum( np.expm1(metric_mles[best_index] + 1.0 * metric_stds[best_index]), 1.0) alpha_batch = np.concatenate([alpha_batch, best_alpha]) sample_alphas = np.concatenate([sample_alphas, best_alpha]) sample_metrics.append(best_alpha_metric_estimate) # Training classifiers for step in range(TRAINING_STEPS): batch_index = range( step * BATCH_SIZE % TRAIN_INPUT_SIZE, step * BATCH_SIZE % TRAIN_INPUT_SIZE + BATCH_SIZE) (batch_x, batch_y) = mnist.train.next_batch(BATCH_SIZE, shuffle=False) batch_weight = [[random_weight_vector[i]] * OUTPUT_DIM for i in batch_index] _, _ = sess.run( [parallel_optimizers, parallel_losses], feed_dict={ x: batch_x, y: batch_y, weight: batch_weight, parallel_alphas: alpha_batch, }) parallel_validation_logits = sess.run(parallel_logits, feed_dict={ x: mnist.validation.images, y: mnist.validation.labels, }) parallel_validation_metrics = [ metric(mnist.validation.labels, validation_logits, all_digits=False) for validation_logits in parallel_validation_logits ] validation_metrics.extend(parallel_validation_metrics) parallel_test_logits = sess.run(parallel_logits, feed_dict={ x: mnist.test.images, y: mnist.test.labels, }) parallel_test_metrics = [ metric(mnist.test.labels, test_logits, all_digits=False) for test_logits in parallel_test_logits ] test_metrics.extend(parallel_test_metrics) parallel_all_test_metrics = [ metric(mnist.test.labels, test_logits, all_digits=True) for test_logits in parallel_test_logits ] all_test_metrics.extend(parallel_all_test_metrics) best_observed_index = np.argmin(validation_metrics) print('[metric] validation={}'.format( validation_metrics[best_observed_index])) print('[metric] test={}'.format(test_metrics[best_observed_index])) for i in range(10): print('[all test metrics] {}={}'.format( i, all_test_metrics[best_observed_index][i]))
def main(): # start of tensorflow graph # input and target placeholders global nvol, batch_cost # print("input_shape:", input_shape) # inputs_ = tf.Variable(shape=input_shape, name="inputs") # targets_ = tf.Variable(shape=input_shape, name="targets") inputs_ = tf.placeholder(tf.float32, input_shape, name='inputs') targets_ = tf.placeholder(tf.float32, input_shape, name='targets') conv1 = tf.keras.layers.Conv3D( filters=16, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(inputs_) maxpool1 = tf.keras.layers.MaxPool3D( pool_size=(2, 2, 2), strides=(3, 2, 2), padding=padding)(conv1) # print('shape maxpool1:', maxpool1.shape) conv2 = tf.keras.layers.Conv3D( filters=32, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(maxpool1) maxpool2 = tf.keras.layers.MaxPool3D( pool_size=(2, 2, 2), strides=(3, 3, 2), padding=padding)(conv2) # print('shape:maxpool2', maxpool2.shape) conv3 = tf.keras.layers.Conv3D( filters=96, kernel_size=(2, 2, 2), strides=stride, padding=padding, activation=tf.nn.relu)(maxpool2) maxpool3 = tf.keras.layers.MaxPool3D( pool_size=(2, 2, 2), strides=(1, 1, 2), padding=padding)(conv3) # print('shape maxpool3:', maxpool3.shape) # decoder unpool1 = K.resize_volumes(maxpool3, 1, 1, 2, "channels_last") deconv1 = tf.keras.layers.Conv3DTranspose(filters=96, kernel_size=(2, 2, 2), strides=stride, padding=padding, activation=tf.nn.relu)(unpool1) # print('shape deconv1:', deconv1.shape) unpool2 = K.resize_volumes(deconv1, 3, 3, 2, "channels_last") deconv2 = tf.keras.layers.Conv3DTranspose(filters=32, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(unpool2) # print('shape deconv2:', deconv2.shape) # (64, 24, 48, 32, 32) unpool3 = K.resize_volumes(deconv2, 3, 2, 2, "channels_last") deconv3 = tf.keras.layers.Conv3DTranspose(filters=16, kernel_size=(3, 3, 3), strides=stride, padding=padding, activation=tf.nn.relu)(unpool3) # print('shape deconv3:', deconv3.shape) # (64, 72, 96, 64, 16) output = tf.keras.layers.Dense( units=1, activation=None)(deconv3) loss = tf.divide(tf.norm(tf.subtract(targets_, output), ord='fro', axis=[0, -1]), tf.norm(targets_, ord='fro', axis=[0, -1])) # print(loss.shape) print("loss:", loss) cost = tf.reduce_mean(loss, name='loss') # print(cost) print("cost:", cost) opt = tf.train.AdamOptimizer(learning_rate).minimize(cost) print("opt:", opt) all_saver = tf.train.Saver(max_to_keep=None) # conv1_v = tf.assign("conv1_v", conv1) # maxpool1_v = tf.assign("maxpool1_v", maxpool1) # conv2_v = tf.assign("conv2_v", conv2) # maxpool2_v = tf.assign("maxpool2_v", maxpool2) # conv3_v = tf.assign("conv3_v", conv3) # maxpool3_v = tf.assign("maxpool3_v", maxpool3) # enc_saver = tf.train.Saver({'conv1': conv1, 'maxpool1': maxpool1, # 'conv2': conv2, 'maxpool2': maxpool2, # 'conv3': conv3, 'maxpool3': maxpool3}) # # initializing a saver to save weights # enc_saver = tf.train.Saver({'conv1': conv1_v, 'maxpool1': maxpool1_v, # 'conv2': conv2_v, 'maxpool2': maxpool2_v, # 'conv3': conv3_v, 'maxpool3': maxpool3_v}) # initializing a restorer to restore weights # res_saver = tf.train.import_meta_graph('/weights/model.ckpt-1.meta') # # summary nodes tf.summary.scalar("loss", loss) tf.summary.scalar("cost", cost) tf.summary.histogram("conv1", conv1) tf.summary.histogram("maxpool1", maxpool1) tf.summary.histogram("conv2", conv2) tf.summary.histogram("maxpool2", maxpool2) tf.summary.histogram("conv3", conv3) tf.summary.histogram("maxpool3", maxpool3) tf.summary.histogram("unpool3", unpool3) tf.summary.histogram("deconv3", deconv3) tf.summary.histogram("unpool2", unpool2) tf.summary.histogram("deconv2", deconv2) tf.summary.histogram("unpool1", unpool1) tf.summary.histogram("deconv1", deconv1) # summary operation and a writer to save it. summary_op = tf.summary.merge_all(key='summaries') writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) # end of tensorflow graph # initializing tensorflow graph and a session init_op = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init_op) # making operation-variables to run our methods whenever needed during training fetch_op_tr = input_pipeline_tr() fetch_op_val = input_pipeline_val() # coordinator and queue runners to manage parallel sampling of batches from the input pipeline coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # start of training counter = 0 try: while not coord.should_stop(): print('\nEpoch\t' + str(counter + 1) + '/' + str(n_epochs)) for i in range(n_batches): # fetching a batch vol = sess.run(fetch_op_tr) nvol = np.asarray(vol) noisy_nvol = nvol + noise_factor * np.random.randn(*nvol.shape) batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: noisy_nvol, targets_: nvol}) if i % 1000 == 0: print("batch_cost", batch_cost) print('\r' + str(((i + 1) * 100) / n_batches) + '%', sys.stdout.flush()) counter = counter + 1 print("Epoch: {}/{}...".format(counter, n_epochs), "Training loss: {:.4f}".format(batch_cost)) print("time cost: {}".format(time.time())) # save weights and biases of the model all_saver.save(sess, ws_path + "model.ckpt", global_step=counter) # save weights and biases of the encoder # enc_saver.save(sess, ws_path + "enc.ckpt", global_step=counter) print('Weights saved') # saving summary code above is clear # print(nvol.shape) # print(nvol.shape) # summary, _ = sess.run([summary_op, opt], feed_dict={inputs_: nvol, targets_: nvol}) # print("summary:", summary) # print("counter:", counter) # writer.add_summary(summary, counter) print('Summary saved') if counter >= n_epochs: break # checking validation error vol = sess.run(fetch_op_val) nvol = np.asarray(vol) batch_cost, _ = sess.run([cost, opt], feed_dict={inputs_: nvol, targets_: nvol}) print('Validation error' + str(batch_cost)) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close() # ''' # code to restore weights with tf.Session(config=config) as sess: all_saver.restore(sess, ws_path + "model.ckpt") print("Model restored.")
def placeholder(dim=None, name=None): return tf.placeholder(dtype=tf.float32, shape=combined_shape(None, dim), name=name)
return np.array([1, 0, 0, 0]) # training dataset preparing DIGIT_COUNT = 10 # x = 101~1024的數字 train_x = np.array( [encore_binary(i, DIGIT_COUNT) for i in range(101, 2**DIGIT_COUNT)]) # y = one hot過的 fizz buzz train_y = np.array([one_hot_fizz_buss(i) for i in range(101, 2**DIGIT_COUNT)]) # TensorFlow parameter preparing HIDDEN_UNIT_COUNT = 100 # input is n x digit_count matrix with float value # output is n x 4 matrix with float value X = tf.placeholder('float', [None, DIGIT_COUNT]) Y = tf.placeholder('float', [None, 4]) # initial weight randomly def init_weights(shape): return tf.Variable(tf.random_normal(shape, stddev=0.01)) # 初始化 hidden layer的weight w_h = init_weights([DIGIT_COUNT, HIDDEN_UNIT_COUNT]) # 初始化 output的weight w_o = init_weights([HIDDEN_UNIT_COUNT, 4]) # define model
labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 return labels_one_hot X_train, Y_train = np.array(train.drop(['Y_1_month'], axis=1)), dense_to_one_hot(train['Y_1_month']) X_test, Y_test = np.array(test.drop(['Y_1_month'], axis=1)), dense_to_one_hot(test['Y_1_month']) # hyperparameters learning_rate = 0.01 num_epochs = 30 batch_size = 1000 input_size = 55 hidden1_size = 100 output_size = 2 display_step = 1 x = tf.placeholder(tf.float32, shape = [None,input_size]) y = tf.placeholder(tf.float32, shape = [None, output_size]) def build_ANN(x): #Layer1 W1 = tf.Variable(tf.random_normal(shape = [input_size, hidden1_size])) b1 = tf.Variable(tf.random_normal(shape = [hidden1_size])) H1_output = tf.nn.relu(tf.matmul(x,W1)+b1) #Layer 2 W_output = tf.Variable(tf.random_normal(shape = [hidden1_size, output_size])) b_output = tf.Variable(tf.random_normal(shape = [output_size])) logits = tf.matmul(H1_output,W_output)+b_output return logits predicted_value = build_ANN(x) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predicted_value, labels=y))
def build_sub_graph(self, length=15, reuse=False): options = self._options hidden_size = options.hidden_size batch_size = options.batch_size seq = tf.placeholder(tf.int32, [batch_size, length], name='seq' + str(length)) e_em, r_em = self._entity_embedding, self._relation_embedding # seperately read, and then recover the order ent = seq[:, :-1:2] rel = seq[:, 1::2] ent_em = tf.nn.embedding_lookup(e_em, ent) rel_em = tf.nn.embedding_lookup(r_em, rel) em_seq = [] for i in range(length - 1): if i % 2 == 0: em_seq.append(ent_em[:, i // 2]) else: em_seq.append(rel_em[:, i // 2]) # seperately bn with tf.variable_scope('input_bn'): if not reuse: bn_em_seq = [ tf.reshape(self.bn(em_seq[i], reuse=(i is not 0)), [-1, 1, hidden_size]) for i in range(length - 1) ] else: bn_em_seq = [ tf.reshape(self.bn(em_seq[i], reuse=True), [-1, 1, hidden_size]) for i in range(length - 1) ] bn_em_seq = tf.concat(bn_em_seq, axis=1) ent_bn_em = bn_em_seq[:, ::2] with tf.variable_scope('rnn', reuse=reuse): cell = self.lstm_cell(True, options.keep_prob, options.num_layers) outputs, state = tf.nn.dynamic_rnn(cell, bn_em_seq, dtype=tf.float32) # with tf.variable_scope('transformer', reuse=reuse): # outputs = transformer_model(input_tensor=bn_em_seq, # hidden_size=hidden_size, # intermediate_size=hidden_size*4, # num_attention_heads=8) rel_outputs = outputs[:, 1::2, :] outputs = [outputs[:, i, :] for i in range(length - 1)] ent_outputs = outputs[::2] # RSN res_rel_outputs = tf.keras.layers.Dense(rel_outputs, hidden_size, biases_initializer=None, activation_fn=None) + \ tf.keras.layers.Dense( ent_bn_em, hidden_size, biases_initializer=None, activation_fn=None) # recover the order res_rel_outputs = [ res_rel_outputs[:, i, :] for i in range((length - 1) // 2) ] outputs = [] for i in range(length - 1): if i % 2 == 0: outputs.append(ent_outputs[i // 2]) else: outputs.append(res_rel_outputs[i // 2]) # output bn with tf.variable_scope('output_bn'): if reuse: bn_outputs = [ tf.reshape(self.bn(outputs[i], reuse=True), [-1, 1, hidden_size]) for i in range(length - 1) ] else: bn_outputs = [ tf.reshape(self.bn(outputs[i], reuse=(i is not 0)), [-1, 1, hidden_size]) for i in range(length - 1) ] def cal_loss(bn_outputs, seq): losses = [] masks = np.random.choice([0., 1.0], size=batch_size, p=[0.5, 0.5]) weight = tf.random_shuffle(tf.cast(masks, tf.float32)) for i, output in enumerate(bn_outputs): if i % 2 == 0: losses.append( self.sampled_loss(output, seq[:, i + 1], self._rel_w, self._rel_b, weight=weight, is_entity=i)) else: losses.append( self.sampled_loss(output, seq[:, i + 1], self._ent_w, self._ent_b, weight=weight, is_entity=i)) losses = tf.stack(losses, axis=1) return losses seq_loss = cal_loss(bn_outputs, seq) losses = tf.reduce_sum(seq_loss) / batch_size return losses, seq
hiddenSize = 100 maxMemory = 500 batchSize = 50 epoch = 100 epsilonStart = 1 epsilonDiscount = 0.999 epsilonMinimumValue = 0.1 discount = 0.9 learningRate = 0.2 winReward = 1 #------------------------------------------------------------ #------------------------------------------------------------ # 가설 설정 #------------------------------------------------------------ X = tf.placeholder(tf.float32, [None, nbStates]) W1 = tf.Variable( tf.truncated_normal([nbStates, hiddenSize], stddev=1.0 / math.sqrt(float(nbStates)))) b1 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01)) input_layer = tf.nn.relu(tf.matmul(X, W1) + b1) W2 = tf.Variable( tf.truncated_normal([hiddenSize, hiddenSize], stddev=1.0 / math.sqrt(float(hiddenSize)))) b2 = tf.Variable(tf.truncated_normal([hiddenSize], stddev=0.01)) hidden_layer = tf.nn.relu(tf.matmul(input_layer, W2) + b2) W3 = tf.Variable( tf.truncated_normal([hiddenSize, nbActions], stddev=1.0 / math.sqrt(float(hiddenSize))))
x, W, strides=[1, 1, 1, 1], padding='SAME') # x为输入,W为参数矩阵,扫描跨度strides=[1,y_step,x_step,1] # 扫描方式 SAME 有两个取值'SAME'和'VALID',对应一个填充,一个不填充 # ------------ 二维池化(channel=1黑白) ------------ def max_pool_2x2(x): return tf.nn.max_pool2d(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # ksize # ------------ 初始化输入输出结构 ------------ # 初始化x -- 输入的数据的大小 xs = tf.placeholder(tf.float32, [None, 784]) # None是数据的个数(不规定) # 手写数字的图片大小为28*28 # 设置实际值 -- 输入的数据 ys = tf.placeholder(tf.float32, [None, 10]) # 输出为1*10 哪一个元素置1 就是哪个数字 x_data = tf.reshape( xs, [-1, 28, 28, 1 ]) # -1表示样本数m(根据每轮训练的输入大小batch_size=100),28*28表示图片大小,1表示channel #-------------- 定义网络结构 -------------- #-------------- 卷积1层 ----------------- # output = 28*28*16 W_conv1 = weight_variable([5, 5, 1, 16 ]) # 定义1层权重(卷积核) 5*5*1*16的矩阵 1表示核的channel,16表示核的个数 b_conv1 = bias_variable([16]) # 定义conv1的bias矩阵 h_conv1 = tf.nn.relu(conv2d(x_data, W_conv1) + b_conv1) # 先线性化(卷积) 再激活(非线性化) #-------------- 池化1层 -----------------
def get_influence_on_test_loss(self, test_indices, train_idx, approx_type='cg', approx_params=None, force_refresh=True, test_description=None, loss_type='normal_loss', X=None, Y=None): # If train_idx is None then use X and Y (phantom points) # Need to make sure test_idx stays consistent between models # because mini-batching permutes dataset order if train_idx is None: if (X is None) or (Y is None): raise (ValueError, 'X and Y must be specified if using phantom points.') if X.shape[0] != len(Y): raise (ValueError, 'X and Y must have the same length.') else: if (X is not None) or (Y is not None): raise ( ValueError, 'X and Y cannot be specified if train_idx is specified.') assert len(test_indices) == 1 self.test_index = test_indices[0] self.train_indices_of_test_case = self.get_train_indices_of_test_case( test_indices) self.params_test = self.get_test_params(test_index=test_indices) self.vec_to_list_test = self.get_vec_to_list_fn_test() # self.logits_test = self.inference_test() # self.total_loss_test, self.loss_no_reg_test, self.indiv_loss_no_reg_test = self.loss( # self.logits_test, # self.labels_placeholder) # # self.grad_total_loss_op_test = tf.gradients(self.total_loss_test, self.params_test) # self.grad_loss_no_reg_op_test = tf.gradients(self.loss_no_reg_test, self.params_test) self.grad_total_loss_op_test = self.get_test_grad( self.grad_total_loss_op) self.grad_loss_no_reg_op_test = self.get_test_grad( self.grad_loss_no_reg_op) self.grad_loss_r_test = self.get_test_grad(self.grad_loss_r) self.v_placeholder_test = [ tf.placeholder(tf.float32, shape=a.get_shape()) for a in self.params_test ] self.hessian_vector_test = self.hessian_vector_product_test( self.total_loss, self.params, self.v_placeholder_test) # test_grad_loss_no_reg_val = self.get_test_grad_loss_no_reg_val(test_indices, loss_type=loss_type) test_grad_loss_r = self.get_r_grad_loss(test_indices, loss_type=loss_type) # print("Shape of test gradient: %s" % test_grad_loss_no_reg_val.shape) print('Norm of test gradient: %s' % np.linalg.norm(np.concatenate(test_grad_loss_r))) # start_time = time.time() if test_description is None: test_description = test_indices approx_filename = os.path.join( self.train_dir, '%s-%s-%s-test-%s.npz' % (self.model_name, approx_type, loss_type, test_description)) if os.path.exists(approx_filename) and force_refresh == False: inverse_hvp = list(np.load(approx_filename)['inverse_hvp']) print('Loaded inverse HVP from %s' % approx_filename) else: start_time = time.time() inverse_hvp = self.get_inverse_hvp(test_grad_loss_r, approx_type, approx_params) np.savez(approx_filename, inverse_hvp=inverse_hvp) print('Saved inverse HVP to %s' % approx_filename) duration_1 = time.time() - start_time print('Inverse HVP took %s sec' % duration_1) start_time = time.time() if train_idx is None: num_to_remove = len(Y) predicted_loss_diffs = np.zeros([num_to_remove]) for counter in np.arange(num_to_remove): single_train_feed_dict = self.fill_feed_dict_manual( X[counter, :], [Y[counter]]) train_grad_loss_val = self.sess.run( self.grad_total_loss_op, feed_dict=single_train_feed_dict) predicted_loss_diffs[counter] = np.dot( np.concatenate(inverse_hvp), np.concatenate( train_grad_loss_val)) / self.num_train_examples else: num_to_remove = len(self.train_indices_of_test_case) predicted_loss_diffs = np.zeros([num_to_remove]) for counter, idx_to_remove in enumerate( self.train_indices_of_test_case): single_train_feed_dict = self.fill_feed_dict_with_one_ex( self.data_sets.train, idx_to_remove) train_grad_loss_val = self.sess.run( self.grad_total_loss_op_test, feed_dict=single_train_feed_dict) predicted_loss_diffs[counter] = np.dot(np.concatenate(inverse_hvp), np.concatenate(train_grad_loss_val)) / \ self.train_indices_of_test_case.shape[0] duration_2 = time.time() - start_time print('Multiplying by %s train examples took %s sec' % (num_to_remove, duration_2)) print("Total time is %s sec" % (duration_1 + duration_2)) return predicted_loss_diffs
import matplotlib.pyplot as plt tf.disable_eager_execution() x_train = np.arange(0, 10, 0.1) y_train = np.sin(x_train) plt.plot(x_train, y_train) plt.show() num_inputs = 1 num_outputs = 1 hidden_layers = 3 hidden_units = 16 learning_rate = 0.01 batch_size = 1024 inputs = tf.placeholder(tf.float32, shape=(None, 1)) targets = tf.placeholder(tf.float32, shape=(None, 1)) w = tf.get_variable("weight-1", shape=(num_inputs, hidden_units)) b = tf.get_variable("bias-1", shape=(hidden_units)) output = tf.matmul(inputs, w) + b for i in range(hidden_layers): w = tf.get_variable(f"weight{i}", shape=(hidden_units, hidden_units)) b = tf.get_variable(f"bias{i}", shape=(hidden_units)) output = tf.matmul(output, w) + b w = tf.get_variable("weight-fin", shape=(hidden_units, num_outputs)) b = tf.get_variable("bias-fin", shape=(num_outputs)) output = tf.matmul(output, w) + b
def __init__(self, network_name, state_size, output_shape, session, k_step=1, settings=None, worker_only=False): network.__init__(self, network_name, state_size, output_shape, session, k_step=k_step, settings=settings, worker_only=worker_only) #Build network! with self.scope as scope: self.vector_inputs = [tf.placeholder(tf.float32, (None,)+s[1:], name='vector_input{}'.format(i)) for i,s in enumerate(self.state_size_vec)] self.visual_inputs = [tf.placeholder(tf.float32, (None,)+s[1:], name='visual_input{}'.format(i)) for i,s in enumerate(self.state_size_vis)] self.training_tf = tf.placeholder(tf.bool, shape=()) self.main_net = self.network_type( network_name, self.output_shape, self.settings, full_network=(not worker_only or self.settings["workers_computes_advantages"]), training=self.training_tf, kbd_activation=N.action_softmax, raw_outputs=True, ) self.v_tf, self.pi_tf = self.main_net(self.vector_inputs, self.visual_inputs) # if not self.worker_only: #For trainers self.rewards_tf = tf.placeholder(tf.float32, (None, k_step+1, 1), name='reward' ) self.dones_tf = tf.placeholder(tf.int32, (None, k_step+1, 1), name='done' ) self.actions_training_tf = tf.placeholder(tf.uint8, (None, 2), name='action' ) self.pieces_training_tf = tf.placeholder(tf.uint8, (None, 1), name='piece' ) self.probabilities_old_tf = tf.placeholder(tf.float32, (None, 1), name='probabilities') self.target_value_tf, self.advantages_tf = self.create_targets(self.v_tf) #params self.params = { 'ppo_epsilon' : tf.placeholder(tf.float32, shape=(), name='ppo_epsilon' ), 'clipping_parameter' : tf.placeholder(tf.float32, shape=(), name='clipping_parameter' ), 'value_loss' : tf.placeholder(tf.float32, shape=(), name='c_value_loss' ), 'policy_loss' : tf.placeholder(tf.float32, shape=(), name='c_policy_loss' ), 'entropy_loss' : tf.placeholder(tf.float32, shape=(), name='c_entropy_loss' ), 'entropy_floor_loss' : tf.placeholder(tf.float32, shape=(), name='c_entropy_floor_loss'), 'rescaled_entropy' : tf.placeholder(tf.float32, shape=(), name='c_rescaled_entropy' ), 'lr' : tf.placeholder(tf.float32, shape=(), name='lr' ), } self.training_ops = self.create_training_ops( self.pi_tf, self.v_tf, self.target_value_tf, self.advantages_tf, self.actions_training_tf, self.pieces_training_tf, self.probabilities_old_tf, self.params, ) self.variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=scope.name) self.main_net_assign_list = self.create_weight_setting_ops(self.variables) self.init_ops = tf.variables_initializer(self.variables) #Run init-op self.session.run(self.init_ops)
def serving_input_receiver_fn(): inputs = {'features': tf.placeholder( shape=[None, 28, 28], dtype=tf.float32)} return tf.estimator.export.ServingInputReceiver(inputs, inputs)
def __init__( self, images: "tf.Tensor", model: Optional["FasterRCNNMetaArch"] = None, filename: Optional[str] = None, url: Optional[str] = None, sess: Optional["Session"] = None, is_training: bool = False, clip_values: Optional["CLIP_VALUES_TYPE"] = None, channels_first: bool = False, preprocessing_defences: Union["Preprocessor", List["Preprocessor"], None] = None, postprocessing_defences: Union["Postprocessor", List["Postprocessor"], None] = None, preprocessing: "PREPROCESSING_TYPE" = (0.0, 1.0), attack_losses: Tuple[str, ...] = ( "Loss/RPNLoss/localization_loss", "Loss/RPNLoss/objectness_loss", "Loss/BoxClassifierLoss/localization_loss", "Loss/BoxClassifierLoss/classification_loss", ), ): """ Initialization of an instance TensorFlowFasterRCNN. :param images: Input samples of shape (nb_samples, height, width, nb_channels). :param model: A TensorFlow Faster-RCNN model. The output that can be computed from the model includes a tuple of (predictions, losses, detections): - predictions: a dictionary holding "raw" prediction tensors. - losses: a dictionary mapping loss keys (`Loss/RPNLoss/localization_loss`, `Loss/RPNLoss/objectness_loss`, `Loss/BoxClassifierLoss/localization_loss`, `Loss/BoxClassifierLoss/classification_loss`) to scalar tensors representing corresponding loss values. - detections: a dictionary containing final detection results. :param filename: Filename of the detection model without filename extension. :param url: URL to download archive of detection model including filename extension. :param sess: Computation session. :param is_training: A boolean indicating whether the training version of the computation graph should be constructed. :param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and maximum values allowed for input image features. If floats are provided, these will be used as the range of all features. If arrays are provided, each value will be considered the bound for a feature, thus the shape of clip values needs to match the total number of features. :param channels_first: Set channels first or last. :param preprocessing_defences: Preprocessing defence(s) to be applied by the classifier. :param postprocessing_defences: Postprocessing defence(s) to be applied by the classifier. :param preprocessing: Tuple of the form `(subtractor, divider)` of floats or `np.ndarray` of values to be used for data preprocessing. The first value will be subtracted from the input. The input will then be divided by the second one. :param attack_losses: Tuple of any combination of strings of the following loss components: `first_stage_localization_loss`, `first_stage_objectness_loss`, `second_stage_localization_loss`, `second_stage_classification_loss`. """ import tensorflow.compat.v1 as tf # lgtm [py/repeated-import] # Super initialization super().__init__( model=model, clip_values=clip_values, channels_first=channels_first, preprocessing_defences=preprocessing_defences, postprocessing_defences=postprocessing_defences, preprocessing=preprocessing, ) # Check clip values if self.clip_values is not None: if not np.all(self.clip_values[0] == 0): raise ValueError( "This classifier requires normalized input images with clip_vales=(0, 1)." ) if not np.all(self.clip_values[1] == 1): # pragma: no cover raise ValueError( "This classifier requires normalized input images with clip_vales=(0, 1)." ) # Check preprocessing and postprocessing defences if self.preprocessing_defences is not None: raise ValueError( "This estimator does not support `preprocessing_defences`.") if self.postprocessing_defences is not None: raise ValueError( "This estimator does not support `postprocessing_defences`.") # Create placeholders for groundtruth boxes self._groundtruth_boxes_list: List["tf.Tensor"] self._groundtruth_boxes_list = [ tf.placeholder(dtype=tf.float32, shape=(None, 4), name=f"groundtruth_boxes_{i}") for i in range(images.shape[0]) ] # Create placeholders for groundtruth classes self._groundtruth_classes_list: List["tf.Tensor"] self._groundtruth_classes_list = [ tf.placeholder(dtype=tf.int32, shape=(None, ), name=f"groundtruth_classes_{i}") for i in range(images.shape[0]) ] # Create placeholders for groundtruth weights self._groundtruth_weights_list: List["tf.Tensor"] self._groundtruth_weights_list = [ tf.placeholder(dtype=tf.float32, shape=(None, ), name=f"groundtruth_weights_{i}") for i in range(images.shape[0]) ] # Load model if model is None: # If model is None, then we need to have parameters filename and url to download, extract and load the # object detection model if filename is None or url is None: filename, url = ( "faster_rcnn_inception_v2_coco_2017_11_08", "http://download.tensorflow.org/models/object_detection/" "faster_rcnn_inception_v2_coco_2017_11_08.tar.gz", ) self._model, self._predictions, self._losses, self._detections = self._load_model( images=images, filename=filename, url=url, obj_detection_model=None, is_training=is_training, groundtruth_boxes_list=self._groundtruth_boxes_list, groundtruth_classes_list=self._groundtruth_classes_list, groundtruth_weights_list=self._groundtruth_weights_list, ) else: self._model, self._predictions, self._losses, self._detections = self._load_model( images=images, filename=None, url=None, obj_detection_model=model, is_training=is_training, groundtruth_boxes_list=self._groundtruth_boxes_list, groundtruth_classes_list=self._groundtruth_classes_list, groundtruth_weights_list=self._groundtruth_weights_list, ) # Save new attributes self._input_shape = images.shape.as_list()[1:] self.is_training: bool = is_training self.images: Optional["tf.Tensor"] = images self.attack_losses: Tuple[str, ...] = attack_losses # Assign session if sess is None: logger.warning("A session cannot be None, create a new session.") self._sess = tf.Session() else: # pragma: no cover self._sess = sess # Initialize variables self._sess.run(tf.global_variables_initializer()) self._sess.run(tf.local_variables_initializer())
def run_differentially_private_federated_averaging(loss, train_op, eval_correct, data, data_placeholder, label_placeholder, privacy_agent=None, b=10, e=4, record_privacy=True, m=0, sigma=0, eps=8, save_dir=None, log_dir=None, max_comm_rounds=3000, gm=True, saver_func=create_save_dir, save_params=False): """ This function will simulate a federated learning setting and enable differential privacy tracking. It will detect all trainable tensorflow variables in the tensorflow graph and simulate a decentralized learning process where these variables are learned through clients that only have access to their own data set. This function must therefore be run inside a Graph as follows: -------------------------------------------------------------------------------------------------------------------- with tf.Graph().as_default(): train_op, eval_correct, loss, data_placeholder, labels_placeholder = Some_function_that_builds_TF_graph() Accuracy_accountant, Delta_accountant, model = \ run_differentially_private_federated_averaging(loss, train_op, eval_correct, DATA, data_placeholder, labels_placeholder) -------------------------------------------------------------------------------------------------------------------- The graph that train_op, loss and eval_op belong to should have a global_step variable. :param loss: TENSORFLOW node that computes the current loss :param train_op: TENSORFLOW Training_op :param eval_correct: TENSORFLOW node that evaluates the number of correct predictions :param data: A class instance with attributes: .data_set : The training data stored in a list or numpy array. .label_set : The training labels stored in a list or numpy array. The indices should correspond to .data_set. This means a single index corresponds to a data(x)-label(y) pair used for training: (x_i, y_i) = (data.data_set(i),data.label_set(i)) .client_set : A nested list or numpy array. len(data.client_set) is the total number of clients. for any j, data.client_set[j] is a list (or array) holding indices. these indices specify the data points that client j holds. i.e. if i \in data.client_set[j], then client j owns (x_i, y_i) .vali_data_set : The validation data stored in a list or numpy array. .vali_label_set : The validation labels stored in a list or numpy array. :param data_placeholder: The placeholder from the tensorflow graph that is used to feed the model with data :param label_placeholder: The placeholder from the tensorflow graph that is used to feed the model with labels :param privacy_agent: A class instance that has callabels .get_m(r) .get_Sigma(r) .get_bound(), where r is the communication round. :param b: Batch size :param e: Epochs to run on each client :param record_privacy: Whether to record the privacy or not :param m: If specified, a privacyAgent is not used, instead the parameter is kept constant :param sigma: If specified, a privacyAgent is not used, instead the parameter is kept constant :param eps: The epsilon for epsilon-delta privacy :param save_dir: Directory to store the process :param log_dir: Directory to store the graph :param max_comm_rounds: The maximum number of allowed communication rounds :param gm: Whether to use a Gaussian Mechanism or not. :param saver_func: A function that specifies where and how to save progress: Note that the usual tensorflow tracking will not work :param save_params: save all weights_throughout training. :return: """ # If no privacy agent was specified, the default privacy agent is used. if not privacy_agent: privacy_agent = PrivAgent(N=len(data.client_set), Name='default_agent', comm_round=max_comm_rounds) # A Flags instance is created that will fuse all specified parameters and default those that are not specified. FLAGS = Flag(len(data.client_set), b, e, record_privacy, m, sigma, eps, save_dir, log_dir, max_comm_rounds, gm, privacy_agent) # Check whether the specified parameters make sense. FLAGS = check_validaity_of_FLAGS(FLAGS) # At this point, FLAGS.save_dir specifies both; where we save progress and where we assume the data is stored save_dir = saver_func(FLAGS) # This function will retrieve the variable associated to the global step and create nodes that serve to # increase and reset it to a certain value. increase_global_step, set_global_step = global_step_creator() # - model_placeholder : a dictionary in which there is a placeholder stored for every trainable variable defined # in the tensorflow graph. Each placeholder corresponds to one trainable variable and has # the same shape and dtype as that variable. in addition, the placeholder has the same # name as the Variable, but a '_placeholder:0' added to it. The keys of the dictionary # correspond to the name of the respective placeholder model_placeholder = dict( zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()], [ tf.placeholder( name=Vname_to_Pname(var), shape=var.shape, dtype=tf.float32) for var in tf.trainable_variables() ])) # - assignments : a list of nodes. when run, all trainable variables are set to the value specified through # the placeholders in 'model_placeholder'. assignments = [ tf.assign(var, model_placeholder[Vname_to_FeedPname(var)]) for var in tf.trainable_variables() ] # load_from_directory_or_initialize checks whether there is a model at 'save_dir' corresponding to the one we # are building. If so, training is resumed, if not, it returns: - model = [] # - accuracy_accountant = [] # - delta_accountant = [] # - real_round = 0 # And initializes a Differential_Privacy_Accountant as acc model, accuracy_accountant, delta_accountant, acc, real_round, FLAGS, computed_deltas = \ load_from_directory_or_initialize(save_dir, FLAGS) # - m : amount of clients participating in a round # - sigma : variable for the Gaussian Mechanism. # Both will only be used if no Privacy_Agent is deployed. m = int(FLAGS.m) sigma = float(FLAGS.sigma) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # If there was no loadable model, we initialize a model: # - model : dictionary having as keys the names of the placeholders associated to each variable. It will serve # as a feed_dict to assign values to the placeholders which are used to set the variables to # specific values. if not model: model = dict( zip([Vname_to_FeedPname(var) for var in tf.trainable_variables()], [sess.run(var) for var in tf.trainable_variables()])) model['global_step_placeholder:0'] = 0 real_round = 0 weights_accountant = [] # If a model is loaded, and we are not relearning it (relearning means that we once already finished such a model # and we are learning it again to average the outcomes), we have to get the privacy accountant up to date. This # means, that we have to iterate the privacy accountant over all the m, sigmas that correspond to already completed # communication if not FLAGS.relearn and real_round > 0: bring_Accountant_up_to_date(acc, sess, real_round, privacy_agent, FLAGS) # This is where the actual communication rounds start: data_set_asarray = np.asarray(data.sorted_x_train) label_set_asarray = np.asarray(data.sorted_y_train) for r in range(FLAGS.max_comm_rounds): # First, we check whether we are loading a model, if so, we have to skip the first allocation, as it took place # already. if not (FLAGS.loaded and r == 0): # Setting the trainable Variables in the graph to the values stored in feed_dict 'model' sess.run(assignments, feed_dict=model) # create a feed-dict holding the validation set. feed_dict = { str(data_placeholder.name): np.asarray(data.x_vali), str(label_placeholder.name): np.asarray(data.y_vali) } # compute the loss on the validation set. global_loss = sess.run(loss, feed_dict=feed_dict) count = sess.run(eval_correct, feed_dict=feed_dict) accuracy = float(count) / float(len(data.y_vali)) accuracy_accountant.append(accuracy) print_loss_and_accuracy(global_loss, accuracy) if delta_accountant[-1] > privacy_agent.get_bound() or math.isnan( delta_accountant[-1]): print('The last step exhausted the privacy budget!!!') if not math.isnan(delta_accountant[-1]): try: None finally: save_progress(save_dir, model, delta_accountant + [float('nan')], accuracy_accountant + [float('nan')], privacy_agent, FLAGS) return accuracy_accountant, delta_accountant, model else: try: None finally: save_progress(save_dir, model, delta_accountant, accuracy_accountant, privacy_agent, FLAGS) ############################################################################################################ # Start of a new communication round real_round = real_round + 1 if real_round >= FLAGS.max_comm_rounds: print('Max communication rounds meet. Stop.') break print_new_comm_round(real_round) if FLAGS.priv_agent: m = int(privacy_agent.get_m(int(real_round))) sigma = privacy_agent.get_Sigma(int(real_round)) print('Clients participating: ' + str(m)) # Randomly choose a total of m (out of n) client-indices that participate in this round # randomly permute a range-list of length n: [1,2,3...n] --> [5,2,7..3] perm = np.random.permutation(FLAGS.n) # Use the first m entries of the permuted list to decide which clients (and their sets) will participate in # this round. participating_clients is therefore a nested list of length m. participating_clients[i] should be # a list of integers that specify which data points are held by client i. Note that this nested list is a # mapping only. the actual data is stored in data.data_set. s = perm[0:m].tolist() participating_clients = [data.client_set[k] for k in s] # For each client c (out of the m chosen ones): for c in range(m): # Assign the global model and set the global step. This is obsolete when the first client trains, # but as soon as the next client trains, all progress allocated before, has to be discarded and the # trainable variables reset to the values specified in 'model' sess.run(assignments + [set_global_step], feed_dict=model) # allocate a list, holding data indices associated to client c and split into batches. data_ind = np.split(np.asarray(participating_clients[c]), FLAGS.b, 0) # e = Epoch for e in range(int(FLAGS.e)): for step in range(len(data_ind)): # increase the global_step count (it's used for the learning rate.) real_step = sess.run(increase_global_step) # batch_ind holds the indices of the current batch batch_ind = data_ind[step] # Fill a feed dictionary with the actual set of data and labels using the data and labels associated # to the indices stored in batch_ind: feed_dict = { str(data_placeholder.name): data_set_asarray[[int(j) for j in batch_ind]], str(label_placeholder.name): label_set_asarray[[int(j) for j in batch_ind]] } # Run one optimization step. _ = sess.run([train_op], feed_dict=feed_dict) if c == 0: # If we just trained the first client in a comm_round, We override the old weights_accountant (or, # if this was the first comm_round, we allocate a new one. The Weights_accountant keeps track of # all client updates throughout a communication round. weights_accountant = WeightsAccountant(sess, model, sigma, real_round) else: # Allocate the client update, if this is not the first client in a communication round weights_accountant.allocate(sess) # End of a communication round ############################################################################################################ print('Communication round %s completed' % str(real_round)) # Compute a new model according to the updates and the Gaussian mechanism specifications from FLAGS # Also, if computed_deltas is an empty list, compute delta; the probability of Epsilon-Differential Privacy # being broken by allocating the model. If computed_deltas is passed, instead of computing delta, the # pre-computed value is used. model, delta = weights_accountant.Update_via_GaussianMechanism( sess, acc, FLAGS, computed_deltas) # append delta to a list. delta_accountant.append(delta) # Set the global_step to the current step of the last client, such that the next clients can feed it into # the learning rate. model['global_step_placeholder:0'] = real_step # PRINT the progress and stage of affairs. print(' - Epsilon-Delta Privacy:' + str([FLAGS.eps, delta])) if save_params: weights_accountant.save_params(save_dir) return [], [], []
import numpy as np import tensorflow.compat.v1 as tf tf.disable_v2_behavior() xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32) x_data = xy[:, 0:-1] y_data = xy[:, [-1]] nb_classes = 7 X = tf.placeholder(tf.float32, shape=[None, 16]) Y = tf.placeholder(tf.int32, shape=[None, 1]) Y_one_hot = tf.one_hot(Y, nb_classes) Y_one_hot = tf.reshape(Y_one_hot, [-1, nb_classes]) W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight') b = tf.Variable(tf.random_normal([nb_classes]), name='bias') logits = tf.matmul(X, W) + b hypothesis = tf.nn.softmax(logits) cost_i = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_one_hot) cost = tf.reduce_mean(cost_i) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) prediction = tf.argmax(hypothesis, 1) correct_prediction = tf.equal(prediction, tf.argmax(Y_one_hot, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def function_train(image, full_x, full_y, full_sigma, intensities): """ Callbacks: lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=5) """ n_input = 1 tf.reset_default_graph() tf.disable_eager_execution() now = datetime.now() x = tf.placeholder("float", [None, 2], name="x") #["float", "float"], [None, 1], name="x") y = tf.placeholder("float", [None, 1], name="y") sigma = tf.placeholder("float", [None, 1], name="sigma") if n_input == 1: x = tf.placeholder("float", [None, 1], name="x") predictions = make_model(x, 1) #MONTE CARLO N_rep = 10 N_full = len(full_y) full_y_reps = np.zeros(shape=(N_full, N_rep)) for i in range(N_rep): full_rep = np.random.normal(0, full_sigma) full_y_reps[:, i] = (full_y + full_rep).reshape(N_full) std_reps = np.std(full_y_reps, axis=1) mean_reps = np.mean(full_y_reps, axis=1) print('MC pseudo data has been created for ', N_rep, ' replicas') ratio_test = 0.8 predict_x = np.empty((0, 2)) for i in range(len(intensities)): predict_x = np.concatenate( (predict_x, np.vstack((image.deltaE, np.ones(image.l) * intensities[i])).T)) #image.deltaE #np.linspace(pred_min,pred_max,N_pred).reshape(N_pred,1) N_pred = image.l * len(intensities) predict_x = predict_x.reshape(N_pred, 2) if n_input == 1: N_pred = image.l predict_x = image.deltaE.reshape(N_pred, 1) full_x = full_x[:, 0] chi_array = [] cost = tf.reduce_mean(tf.square((y - predictions) / sigma), name="cost_function") eta = 6.0e-3 optimizer = tf.train.RMSPropOptimizer(learning_rate=eta, decay=0.9, momentum=0.0, epsilon=1e-10).minimize(cost) saver = tf.train.Saver(max_to_keep=1000) #print("Start training on", '%04d'%(N_train), "and validating on",'%0.4d'%(N_test), "samples") #Nrep = 100 for i in range(0, N_rep): map_name = 'Models' i = 0 while os.path.exists(map_name): map_name = 'Models' + str(i) i += 1 full_y = full_y_reps[:, i].reshape(N_full, 1) train_x, test_x, train_y, test_y, train_sigma, test_sigma = \ train_test_split(full_x, full_y, full_sigma, test_size=ratio_test) #print(len(train_x)) N_train = len(train_y) N_test = len(test_y) if n_input == 2: train_x, test_x = train_x.reshape(N_train, 2), test_x.reshape(N_test, 2) else: train_x, test_x = train_x.reshape(N_train, 1), test_x.reshape(N_test, 1) train_y, test_y = train_y.reshape(N_train, 1), test_y.reshape(N_test, 1) train_sigma, test_sigma = train_sigma.reshape(N_train, 1), test_sigma.reshape( N_test, 1) ### Train and validate prev_test_cost = 0 prev_epoch = 0 avg_cost = 0 array_train = [] array_test = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) training_epochs = 20000 display_step = 1000 for epoch in range(training_epochs): _, c = sess.run([optimizer, cost], feed_dict={ x: train_x, y: train_y, sigma: train_sigma }) avg_cost = c test_cost = cost.eval({ x: test_x, y: test_y, sigma: test_sigma }) if epoch % display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "| Training cost=", "{:.9f}".format(avg_cost), "| Validation cost=", "{:.9f}".format(test_cost)) array_train.append(avg_cost) array_test.append(test_cost) path_to_data = map_name + '/All_models/' Path(path_to_data).mkdir(parents=True, exist_ok=True) saver.save(sess, path_to_data + 'my-model.ckpt', global_step=epoch, write_meta_graph=False) elif test_cost < prev_test_cost: prev_test_cost = test_cost prev_epoch = epoch best_iteration = np.argmin(array_test) best_epoch = best_iteration * display_step best_model = map_name + '/All_models/my-model.ckpt-%(s)s' % { 's': best_epoch } print("Optimization %(i)s Finished! Best model after epoch %(s)s" % { 'i': i, 's': best_epoch }) dt_string = now.strftime("%d.%m.%Y %H:%M:%S") d_string = now.strftime("%d.%m.%Y") t_string = now.strftime("%H:%M:%S") saver.restore(sess, best_model) path_to_data = map_name + '/Best_models/%(s)s/' % {'s': d_string} Path(path_to_data).mkdir(parents=True, exist_ok=True) saver.save(sess, path_to_data + 'best_model_%(i)s' % {'i': i}) predictions_values = sess.run(predictions, feed_dict={ x: train_x, y: train_y }) extrapolation = sess.run(predictions, feed_dict={x: predict_x}) sess.close() nownow = datetime.now() print("time elapsed", nownow - now) path_to_data = 'Data/Results/%(date)s/' % {"date": d_string} Path(path_to_data).mkdir(parents=True, exist_ok=True)
def train(cfg: DictConfig, work_dir: Optional[str] = None, **session_kwargs): """Runs the training process for the provided config. Parameters ---------- cfg : DictConfig The experiment configuration. work_dir : str, optional Working directory used for saving checkpoints, logs, etc. If None, it is set to `os.getcwd()`. **session_kwargs : kwargs Keyword arguments for configuring TF session """ # Set working dir. if work_dir is None: work_dir = os.getcwd() # Set random seeds. random.seed(cfg.run.seed) np.random.seed(cfg.run.seed) tf.set_random_seed(cfg.run.seed) # Setup the session. with utils.session(**session_kwargs) as sess: # Build and initialize. meta_learner = utils.build_and_initialize(cfg, mode=common.ModeKeys.TRAIN) # Setup logging and saving. writers = [ tf.summary.FileWriter(logdir=os.path.join(work_dir, task.log_dir)) for task in cfg[common.ModeKeys.TRAIN].tasks ] label_budget_ph = tf.placeholder(tf.int32, shape=()) loss_ph = tf.placeholder(tf.float32, shape=()) tf.summary.scalar("label_budget", label_budget_ph) tf.summary.scalar("loss", loss_ph) merged = tf.summary.merge_all() # Setup checkpoint. checkpoint = tf.train.Checkpoint( model_state=meta_learner.model.trainable_parameters, optimizer=meta_learner.optimizer, ) saver = tf.train.CheckpointManager(checkpoint, directory=work_dir, max_to_keep=5) # Do meta-learning iterations. logger.info("Training...") for i in range(cfg.train.max_steps): # Do multiple steps if the optimizer is multi-step. if cfg.train.optimizer.n is not None: losses = [ train_step(meta_learner, sess=sess) for _ in range(cfg.train.optimizer.n) ] losses = list(map(np.mean, zip(*losses))) else: losses = train_step(meta_learner, sess=sess) # Log metrics. # TODO: create a utility function for logging. if i % cfg.train.log_interval == 0 or i + 1 == cfg.train.max_steps: log = f"step: {i}" for loss, td in zip(losses, meta_learner.task_dists): log += f"\n{td.name}:" if td.num_requested_labels: log += f"\n* requested labels: {td.num_requested_labels}" log += f"\n* loss: {loss:.6f}" logger.info(log) for loss, td, writer in zip(losses, meta_learner.task_dists, writers): feed_dict = { loss_ph: loss, label_budget_ph: td.num_requested_labels, } summary = sess.run(merged, feed_dict=feed_dict) writer.add_summary(summary, i) writer.flush() # Save model. if i % cfg.train.save_interval == 0 or i + 1 == cfg.train.max_steps: saver.save(checkpoint_number=i) # Update task distribution (if necessary). # TODO: make this more flexible. if (cfg.train.budget_interval is not None and i % cfg.train.budget_interval == 0): for td, task in zip(meta_learner.task_dists, cfg.train.tasks): td.expand(num_labeled_points=(task.labels_per_step * i), sess=sess) if cfg.train.do_reinit: sess.run(tf.global_variables_initializer())
learning_Rate = 0.01 training_epochs = 2000 display_step = 200 # Dataset de treino # Obs.: Considerar train_X = tamanho de casas e train_y = preço de casas train_X = np.asarray([3.3,4.4,5.5,6.71,6.93,4.168,9.779,6.182,7.59,2.167,7.042,10.791,5.313,7.997,5.654,9.27,3.1]) train_y = np.asarray([1.7,2.76,2.09,3.19,1.694,1.573,3.366,2.596,2.53,1.221,2.827,3.465,1.65,2.904,2.42,2.94,1.3]) n_samples = train_X.shape[0] # Dataset de teste test_X = np.asarray([6.83, 4.668, 8.9, 7.91, 5.7, 8.7, 3.1, 2.1]) test_y = np.asarray([1.84, 2.273, 3.2, 2.831, 2.92, 3.24, 1.35, 1.03]) # Placeholders para as variáveis preditoras (X) e para variável target (y) X = tf.placeholder(tf.float32) y = tf.placeholder(tf.float32) # Pesos e bias do modelo W = tf.Variable(np.random.randn(), name="weight") b = tf.Variable(np.random.randn(), name="bias") # Construindo o modelo de regressão linear # Formula: y = w*X + b linear_model = W * X + b # Calculo de erro cost = tf.reduce_sum(tf.square(linear_model - y)) / (2*n_samples) # Otimização com Gradient descent optimizer = tf.train.GradientDescentOptimizer(learning_Rate).minimize(cost)
def infer(train_dir, height, width, nch, d_i, d_o, G_dim, named_id_to_fps=None, id_name_tsv_fp=None): infer_dir = os.path.join(train_dir, 'infer') if not os.path.isdir(infer_dir): os.makedirs(infer_dir) # Placeholders for sampling stage samp_zi_n = tf.placeholder(tf.int32, [], name='samp_zi_n') samp_zo_n = tf.placeholder(tf.int32, [], name='samp_zo_n') # Sample IDs or fps for comparison if named_id_to_fps is not None: # Find number of identities and sample nids = len(named_id_to_fps) tf.constant(nids, dtype=tf.int32, name='nids') samp_id = tf.random_uniform([samp_zi_n], 0, nids, dtype=tf.int32, name='samp_id') # Find named ids and group fps named_ids = [] fps = [] for i, (named_id, group_fps) in enumerate( sorted(named_id_to_fps.items(), key=lambda k: k[0])): named_ids.append(named_id) fps.append(','.join(group_fps)) named_ids = tf.constant(named_ids, dtype=tf.string, name='meta_all_named_ids') fps = tf.constant(fps, dtype=tf.string, name='meta_all_fps') # Alternative names (such as real names with spaces; not convenient for file paths) if id_name_tsv_fp is not None: with open(id_name_tsv_fp, 'r') as f: names = [l.split('\t')[1].strip() for l in f.readlines()[1:]] named_ids = tf.constant(names, dtype=tf.string, name='meta_all_names') samp_named_id = tf.gather(named_ids, samp_id, name='samp_named_ids') samp_fp_group = tf.gather(fps, samp_id, name='samp_group_fps') if id_name_tsv_fp is not None: samp_name = tf.gather(names, samp_id, name='samp_names') # Sample zi/zo samp_zi = tf.random_uniform([samp_zi_n, d_i], -1.0, 1.0, dtype=tf.float32, name='samp_zi') samp_zo = tf.random_uniform([samp_zo_n, d_o], -1.0, 1.0, dtype=tf.float32, name='samp_zo') # Input zo zi = tf.placeholder(tf.float32, [None, d_i], name='zi') zo = tf.placeholder(tf.float32, [None, d_o], name='zo') # Latent representation z = tf.concat([zi, zo], axis=1, name='z') # Make zi/zo grid zi_n = tf.shape(zi)[0] zo_n = tf.shape(zo)[0] zi_grid = tf.expand_dims(zi, axis=1) zi_grid = tf.tile(zi_grid, [1, zo_n, 1]) zo_grid = tf.expand_dims(zo, axis=0) zo_grid = tf.tile(zo_grid, [zi_n, 1, 1]) z_grid = tf.concat([zi_grid, zo_grid], axis=2, name='z_grid') # Execute generator with tf.variable_scope('G'): G_z = DCGANGenerator64x64(z, nch, dim=G_dim) G_z = tf.identity(G_z, name='G_z') # Execute generator on grid z_grid = tf.reshape(z_grid, [zi_n * zo_n, d_i + d_o]) with tf.variable_scope('G', reuse=True): G_z_grid = DCGANGenerator64x64(z_grid, nch, dim=G_dim) G_z_grid = tf.reshape(G_z_grid, [zi_n, zo_n, height, width, nch], name='G_z_grid') # Encode to uint8 G_z_uint8 = encode_png_observation(G_z, name='G_z_uint8') G_z_grid_uint8 = encode_png_observation(G_z_grid, name='G_z_grid_uint8') # Flatten grid of images to one large image (row shares zi, column shares zo) grid_zo_n = tf.shape(G_z_grid_uint8)[1] G_z_grid_prev = tf.transpose(G_z_grid_uint8, [1, 0, 2, 3, 4]) G_z_grid_prev = tf.reshape(G_z_grid_prev, [grid_zo_n, zi_n * height, width, nch]) G_z_grid_prev = tf.transpose(G_z_grid_prev, [1, 0, 2, 3]) G_z_grid_prev = tf.reshape(G_z_grid_prev, [zi_n * height, grid_zo_n * width, nch], name='G_z_grid_prev') # Create saver G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='G') global_step = tf.train.get_or_create_global_step() saver = tf.train.Saver(G_vars + [global_step]) # Export graph tf.train.write_graph(tf.get_default_graph(), infer_dir, 'infer.pbtxt') # Export MetaGraph infer_metagraph_fp = os.path.join(infer_dir, 'infer.meta') tf.train.export_meta_graph(filename=infer_metagraph_fp, clear_devices=True, saver_def=saver.as_saver_def()) # Reset graph (in case training afterwards) tf.reset_default_graph()
def build_model(env, cfg): # Inputs to computation graph - observations with tf.name_scope("Inputs"): with tf.name_scope("Observation"): ( x_non_spatial_ph, x_spatial_ph, x_mask_spell_ph, x_mask_spatial_ph, x_if_spawn_spell_ph, ) = utils.placeholders_from_spaces( env.observation_space["non_spatial"], env.observation_space["spatial"], env.observation_space["mask_spell"], env.observation_space["mask_spatial"], env.observation_space["if_spawn_spell"], names=[ "NonSpatial", "Spatial", "MaskSpell", "MaskSpatial", "IfSpawnSpell", ], ) x_state_in_ph = tf.placeholder( dtype=tf.float32, shape=cfg.architecture.empty_rnn_state.shape, name="StateIn", ) x_batch_size_ph = tf.placeholder(dtype=tf.int64, shape=(), name="BatchSize") x_rnn_mask_ph = utils.placeholder(None, name="RnnMask") with tf.name_scope("Action"): # Inputs to computation graph - actions ( a_spell_ph, a_spatial_ph, ) = utils.placeholders_from_spaces( env.action_space["spell"], env.action_space["spatial"], names=["Spell", "Spatial"], ) adv_ph, ret_ph, logp_old_ph, v_old_ph = utils.placeholders( None, None, None, None, names=["Advantage", "Return", "Logp", "Value"]) actor_critic = core.actor_critic_fn(cfg) # Share information about action space with policy architecture # state_out is None, if Architecture.USE_RNN == false pi, logp, logp_pi, v, state_out = actor_critic( { "spatial": x_spatial_ph, "non_spatial": x_non_spatial_ph, "mask_spell": x_mask_spell_ph, "mask_spatial": x_mask_spatial_ph, "if_spawn_spell": x_if_spawn_spell_ph, "state_in": x_state_in_ph, "batch_size": x_batch_size_ph, "rnn_mask": x_rnn_mask_ph, }, { "spell": a_spell_ph, "spatial": a_spatial_ph }, env.action_space, cfg, ) hyperp = cfg.hyperparameters if hyperp.clip_vf_output: v = tf.clip_by_value(v, -1.0, 1.0) # PPO objectives with tf.name_scope("Objectives"): # policy block ratio = tf.exp(logp - logp_old_ph) # pi(a|s) / pi_old(a|s) min_adv = tf.where( adv_ph > 0, (1 + hyperp.clip_ratio) * adv_ph, (1 - hyperp.clip_ratio) * adv_ph, ) pi_loss = -tf.reduce_mean(tf.minimum(ratio * adv_ph, min_adv), name="LossPi") # value block if hyperp.value_clipping_enabled: # value function clipping v_clipped = v_old_ph + tf.clip_by_value( v - v_old_ph, -hyperp.clip_range_vf, hyperp.clip_range_vf) v_loss1 = tf.square(v - ret_ph) v_loss2 = tf.square(v_clipped - ret_ph) v_loss = tf.reduce_mean(tf.maximum(v_loss1, v_loss2), name="LossV") else: v_loss = tf.reduce_mean((ret_ph - v)**2, name="LossV") # value function regularization if hyperp.vf_reg_enabled and not cfg.architecture.unified_policy_value: params_v = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="ValueFunctionMain") v_loss_l2 = tf.add_n( [tf.nn.l2_loss(v) for v in params_v if "bias" not in v.name]) v_loss = v_loss + hyperp.vf_reg * v_loss_l2 # todo michalw: entropy bonus, value function clipping? loss = hyperp.pi_loss_coef * pi_loss + hyperp.vf_loss_coef * v_loss # Useful to watch during learning with tf.name_scope("Info"): # a sample estimate for KL-divergence, easy to compute approx_kl = tf.reduce_mean(logp_old_ph - logp, name="KL") # a sample estimate for entropy, also easy to compute approx_ent = tf.reduce_mean(-logp, name="Entropy") clipped = tf.logical_or(ratio > (1 + hyperp.clip_ratio), ratio < (1 - hyperp.clip_ratio)) clipfrac = tf.reduce_mean(tf.cast(clipped, tf.float32), name="ClipFrac") with tf.name_scope("Optimizers"): pi_optimizer = MpiAdamOptimizer(learning_rate=hyperp.pi_lr) v_optimizer = MpiAdamOptimizer(learning_rate=hyperp.vf_lr) optimizer = MpiAdamOptimizer(learning_rate=hyperp.lr) train, train_pi, train_v = None, None, None if hyperp.grad_clipping_enabled: if cfg.architecture.unified_policy_value: # gradient clipping enabled, unified PV params = tf.trainable_variables() grads, _vars = zip(*optimizer.compute_gradients(loss, params)) grads, _grad_norm = tf.clip_by_global_norm( grads, hyperp.max_grad_norm) grads = list(zip(grads, params)) train = optimizer.apply_gradients(grads) else: # gradient clipping enabled, separate PV params_pi = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="PolicyMain") pi_grads, _vars = zip( *pi_optimizer.compute_gradients(pi_loss, params_pi)) pi_grads, _grad_norm = tf.clip_by_global_norm( pi_grads, hyperp.max_grad_norm) pi_grads = list(zip(pi_grads, params_pi)) train_pi = pi_optimizer.apply_gradients(pi_grads) params_v = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="ValueFunctionMain") v_grads, _vars = zip( *v_optimizer.compute_gradients(v_loss, params_v)) v_grads, _grad_norm = tf.clip_by_global_norm( v_grads, hyperp.max_grad_norm) v_grads = list(zip(v_grads, params_v)) train_v = v_optimizer.apply_gradients(v_grads) else: if cfg.architecture.unified_policy_value: # no gradient clipping, unified PV train = optimizer.minimize(loss) else: # no gradient clipping, separate PV train_pi = pi_optimizer.minimize(pi_loss) train_v = v_optimizer.minimize(v_loss) if not cfg.architecture.unified_policy_value: tf.add_to_collection("train_pi", train_pi) for grad, var in pi_optimizer.grads_and_vars: tf.add_to_collection("train_pi", grad) tf.add_to_collection("train_pi", var) tf.add_to_collection("train_v", train_v) for grad, var in v_optimizer.grads_and_vars: tf.add_to_collection("train_v", grad) tf.add_to_collection("train_v", var) else: tf.add_to_collection("train", train) for grad, var in optimizer.grads_and_vars: tf.add_to_collection("train", grad) tf.add_to_collection("train", var) sync_op = sync_all_params() tf.add_to_collection("sync_op", sync_op) summary_op = tf.summary.merge_all() if summary_op is None: summary_op = tf.no_op() tf.add_to_collection("summary_op", summary_op) return { "x_non_spatial_ph": x_non_spatial_ph, "x_spatial_ph": x_spatial_ph, "x_mask_spell_ph": x_mask_spell_ph, "x_mask_spatial_ph": x_mask_spatial_ph, "x_if_spawn_spell_ph": x_if_spawn_spell_ph, "x_state_in_ph": x_state_in_ph, "x_rnn_mask_ph": x_rnn_mask_ph, "x_batch_size_ph": x_batch_size_ph, "a_spell_ph": a_spell_ph, "a_spatial_ph": a_spatial_ph, "adv_ph": adv_ph, "v_old_ph": v_old_ph, "ret_ph": ret_ph, "logp_old_ph": logp_old_ph, "pi_spell": pi["spell"], "pi_spell_argmax": pi["spell_argmax"], "pi_spatial": pi["spatial"], "pi_spatial_argmax": pi["spatial_argmax"], "v": v, "state_out": state_out, "pi_loss": pi_loss, "v_loss": v_loss, "loss": loss, "approx_ent": approx_ent, "approx_kl": approx_kl, "clipfrac": clipfrac, "logp_pi": logp_pi, "train_pi": train_pi, "train_v": train_v, "train": train, "sync_op": sync_op, "summary_op": summary_op, }
def init_placeholders(self): with tf.variable_scope("input_layer"): # [B] user id self.user_id = tf.placeholder(tf.int32, [ None, ], name="user") # [B] item list (user history) self.item_list = tf.placeholder(tf.int32, [None, None], name="item_seq") # category list self.category_list = tf.placeholder(tf.int32, [None, None], name='category_list') # time_list self.time_list = tf.placeholder(tf.float32, [None, None], name='time_list') # time_last list (the interval between the current item and its last item) self.timelast_list = tf.placeholder(tf.float32, [None, self.position_count], name='timelast_list') # time_now_list (the interval between the current item and the target item) self.timenow_list = tf.placeholder(tf.float32, [None, None], name='timenow_list') # position list self.position_list = tf.placeholder(tf.int32, [None, None], name='position_list') # target item id self.target_item_id = tf.placeholder(tf.int32, [None], name='target_item_id') # target item id self.target_item_category = tf.placeholder( tf.int32, [None], name='target_item_category') # target item id self.target_item_time = tf.placeholder(tf.float32, [None], name='target_item_time') # length of item list self.seq_length = tf.placeholder(tf.int32, [ None, ], name="seq_length") self.is_reconsume = tf.placeholder(tf.float32, [ None, ], name="is_reconsume") self.reconsume_list = tf.placeholder(tf.float32, [None, None], name='reconsume_list') # self.t_adj_in = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='t_adj_in') # self.t_adj_out = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='t_adj_out') # # self.mask_adj_in = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='mask_adj_in') # self.mask_adj_out = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='mask_adj_out') self.adj_masks = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='adj_masks') self.eid_adj = tf.placeholder(dtype=tf.float32, shape=[None, None, None], name='eid_adj') self.adj_avg_time = tf.placeholder(dtype=tf.float32, shape=[None, None], name='adj_avg_time')
def _build_net(self): def build_layers(s, c_names, n_l1, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) if self.dueling: # Dueling DQN with tf.variable_scope('Value'): w2 = tf.get_variable('w2', [n_l1, 1], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, 1], initializer=b_initializer, collections=c_names) self.V = tf.matmul(l1, w2) + b2 with tf.variable_scope('Advantage'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.A = tf.matmul(l1, w2) + b2 with tf.variable_scope('Q'): out = self.V + (self.A - tf.reduce_mean( self.A, axis=1, keep_dims=True)) # Q = V(s) + A(s,a) else: with tf.variable_scope('Q'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) out = tf.matmul(l1, w2) + b2 return out # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize( self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer)
def main(_): tf.logging.set_verbosity(tf.logging.INFO) trainer_lib.set_random_seed(FLAGS.random_seed) usr_dir.import_usr_dir(FLAGS.t2t_usr_dir) # Create hparams hparams = trainer_lib.create_hparams(FLAGS.hparams_set, FLAGS.hparams, data_dir=os.path.expanduser( FLAGS.data_dir), problem_name=FLAGS.problem) hparams.force_full_predict = True hparams.scheduled_sampling_k = -1 # Params num_agents = 1 # TODO(mbz): fix the code for more agents num_steps = FLAGS.num_steps if hasattr(hparams.problem, "num_actions"): num_actions = hparams.problem.num_actions else: num_actions = None frame_shape = hparams.problem.frame_shape resized_frame = hparams.preprocess_resize_frames is not None if resized_frame: frame_shape = hparams.preprocess_resize_frames frame_shape += [hparams.problem.num_channels] dataset = registry.problem(FLAGS.problem).dataset( tf_estimator.ModeKeys.TRAIN, shuffle_files=True, data_dir=os.path.expanduser(FLAGS.data_dir), hparams=hparams) dataset = dataset.batch(num_agents, drop_remainder=True) data = dataset.make_one_shot_iterator().get_next() # Setup input placeholders input_size = [num_agents, hparams.video_num_input_frames] if num_actions is None: placeholders = { "inputs": tf.placeholder(tf.float32, input_size + frame_shape) } else: placeholders = { "inputs": tf.placeholder(tf.float32, input_size + frame_shape), "input_action": tf.placeholder(tf.int64, input_size + [1]), "input_reward": tf.placeholder(tf.int64, input_size + [1]), "reset_internal_states": tf.placeholder(tf.float32, []), } # Create model. model_cls = registry.model(FLAGS.model) model = model_cls(hparams, tf_estimator.ModeKeys.PREDICT) prediction_ops = model.infer(placeholders) states_q = Queue(maxsize=hparams.video_num_input_frames) actions_q = Queue(maxsize=hparams.video_num_input_frames) rewards_q = Queue(maxsize=hparams.video_num_input_frames) if num_actions is not None: all_qs = [states_q, actions_q, rewards_q] else: all_qs = [states_q] writer = common_video.WholeVideoWriter(fps=FLAGS.fps, output_path=FLAGS.output_gif) saver = tf.train.Saver(tf.trainable_variables()) with tf.train.SingularMonitoredSession() as sess: # Load latest checkpoint ckpt = tf.train.get_checkpoint_state( FLAGS.output_dir).model_checkpoint_path saver.restore(sess.raw_session(), ckpt) # get init frames from the dataset data_np = sess.run(data) frames = np.split(data_np["inputs"], hparams.video_num_input_frames, 1) for frame in frames: frame = np.squeeze(frame, 1) states_q.put(frame) writer.write(frame[0].astype(np.uint8)) if num_actions is not None: actions = np.split(data_np["input_action"], hparams.video_num_input_frames, 1) for action in actions: actions_q.put(np.squeeze(action, 1)) rewards = np.split(data_np["input_reward"], hparams.video_num_input_frames, 1) for reward in rewards: rewards_q.put(np.squeeze(reward, 1)) for step in range(num_steps): print(">>>>>>> ", step) if num_actions is not None: random_actions = np.random.randint(num_actions - 1) random_actions = np.expand_dims(random_actions, 0) random_actions = np.tile(random_actions, (num_agents, 1)) # Shape inputs and targets inputs, input_action, input_reward = (np.stack(list(q.queue), axis=1) for q in all_qs) else: assert len(all_qs) == 1 q = all_qs[0] elems = list(q.queue) # Need to adjust shapes sometimes. for i, e in enumerate(elems): if len(e.shape) < 4: elems[i] = np.expand_dims(e, axis=0) inputs = np.stack(elems, axis=1) # Predict next frames if num_actions is None: feed = {placeholders["inputs"]: inputs} else: feed = { placeholders["inputs"]: inputs, placeholders["input_action"]: input_action, placeholders["input_reward"]: input_reward, placeholders["reset_internal_states"]: float(step == 0), } predictions = sess.run(prediction_ops, feed_dict=feed) if num_actions is None: predicted_states = predictions[:, 0] else: predicted_states = predictions["targets"][:, 0] predicted_reward = predictions["target_reward"][:, 0] # Update queues if num_actions is None: new_data = (predicted_states) else: new_data = (predicted_states, random_actions, predicted_reward) for q, d in zip(all_qs, new_data): q.get() q.put(d.copy()) writer.write(np.round(predicted_states[0]).astype(np.uint8)) writer.finish_to_disk()
def train(self, train_dataset: Dataset, val_dataset: Dataset): with tf.device('/cpu:1'): with tf.Graph().as_default(): # set the learning rate self.global_step = tf.Variable(0, trainable=False) self.lr = tf.train.exponential_decay(self.initLr, self.global_step, self.lrDecayFreq, self.lrDecayRate, staircase=True) # preallocate x, y, baseline labels = tf.placeholder( "float32", shape=[self.batch_size, self.n_classes]) self.labels_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size), name="labels_raw") self.onehot_labels_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size, self.n_classes), name="labels_onehot") self.inputs_placeholder = tf.placeholder( tf.float32, shape=(self.batch_size, self.img_size * self.img_size), name="images") # declare the model parameters, here're naming rule: # the 1st captical letter: weights or bias (W = weights, B = bias) # the 2nd lowercase letter: the network (e.g.: g = glimpse network) # the 3rd and 4th letter(s): input-output mapping, which is clearly written in the variable name argument self.Wg_l_h = self.weight_variable( (2, self.hl_size), "glimpseNet_wts_location_hidden", True) self.Bg_l_h = self.weight_variable( (1, self.hl_size), "glimpseNet_bias_location_hidden", True) self.Wg_g_h = self.weight_variable( (self.totalSensorBandwidth, self.hg_size), "glimpseNet_wts_glimpse_hidden", True) self.Bg_g_h = self.weight_variable( (1, self.hg_size), "glimpseNet_bias_glimpse_hidden", True) self.Wg_hg_gf1 = self.weight_variable( (self.hg_size, self.g_size), "glimpseNet_wts_hiddenGlimpse_glimpseFeature1", True) self.Wg_hl_gf1 = self.weight_variable( (self.hl_size, self.g_size), "glimpseNet_wts_hiddenLocation_glimpseFeature1", True) self.Bg_hlhg_gf1 = self.weight_variable( (1, self.g_size), "glimpseNet_bias_hGlimpse_hLocs_glimpseFeature1", True) self.Wc_g_h = self.weight_variable( (self.cell_size, self.g_size), "coreNet_wts_glimpse_hidden", True) self.Bc_g_h = self.weight_variable( (1, self.g_size), "coreNet_bias_glimpse_hidden", True) self.Wr_h_r = self.weight_variable( (self.cell_out_size, self.img_size**2), "reconstructionNet_wts_hidden_action", True) self.Br_h_r = self.weight_variable( (1, self.img_size**2), "reconstructionNet_bias_hidden_action", True) self.Wb_h_b = self.weight_variable( (self.g_size, 1), "baselineNet_wts_hiddenState_baseline", True) self.Bb_h_b = self.weight_variable( (1, 1), "baselineNet_bias_hiddenState_baseline", True) self.Wl_h_l = self.weight_variable( (self.cell_out_size, 2), "locationNet_wts_hidden_location", True) self.Bl_h_l = self.weight_variable( (1, 2), "locationNet_bias_hidden_location", True) self.Wa_h_a = self.weight_variable( (self.cell_out_size, self.n_classes), "actionNet_wts_hidden_action", True) self.Ba_h_a = self.weight_variable( (1, self.n_classes), "actionNet_bias_hidden_action", True) # query the model ouput outputs = self.model() # convert list of tensors to one big tensor self.sampled_locs = tf.concat(axis=0, values=self.sampled_locs) self.sampled_locs = tf.reshape( self.sampled_locs, (self.nGlimpses, self.batch_size, 2)) self.sampled_locs = tf.transpose(self.sampled_locs, [1, 0, 2]) self.mean_locs = tf.concat(axis=0, values=self.mean_locs) self.mean_locs = tf.reshape( self.mean_locs, (self.nGlimpses, self.batch_size, 2)) self.mean_locs = tf.transpose(self.mean_locs, [1, 0, 2]) self.glimpse_images = tf.concat(axis=0, values=self.glimpse_images) # compute the reward # reconstructionCost, reconstruction, train_op_r = self.preTrain(outputs) cost, reward, predicted_labels, correct_labels, train_op, b, avg_b, rminusb, lr = \ self.calc_reward(outputs) ####################################### START RUNNING THE MODEL ####################################### sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess_config.gpu_options.allow_growth = True sess = tf.Session(config=sess_config) saver = tf.train.Saver() b_fetched = np.zeros((self.batch_size, (self.nGlimpses) * 2)) init = tf.global_variables_initializer() sess.run(init) # iterations per epoch except last batch iterations_per_epoch = (train_dataset.num_examples // self.batch_size) print("iterations_per_epoch: " + str(iterations_per_epoch)) # fig = plt.figure(1) # txt = fig.suptitle("-", fontsize=36, fontweight='bold') # plt.ion() # plt.show() # plt.subplots_adjust(top=0.7) # plotImgs = [] iter = 0 # training for epoch in range(0, self.max_epochs): for batch_idx in range(0, train_dataset.batch_count()): start_time = time.time() # get the next batch of examples nextX, nextY = train_dataset.next_batch(batch_idx) nextX_orig = nextX if self.translateMnist: nextX, nextX_coord = self.convertTranslated( nextX, self.ORIG_IMG_SIZE, self.img_size) feed_dict = { self.inputs_placeholder: nextX, self.labels_placeholder: nextY, self.onehot_labels_placeholder: self.dense_to_one_hot(nextY, num_classes=self.n_classes) } fetches = [ train_op, cost, reward, predicted_labels, correct_labels, self.glimpse_images, avg_b, rminusb, self.mean_locs, self.sampled_locs, self.lr ] # feed them to the model results = sess.run(fetches, feed_dict=feed_dict) _, cost_fetched, reward_fetched, prediction_labels_fetched, correct_labels_fetched, \ glimpse_images_fetched, avg_b_fetched, rminusb_fetched, mean_locs_fetched, sampled_locs_fetched, lr_fetched = results duration = time.time() - start_time if iter % 50 == 0: print(( 'Step %d: cost = %.5f reward = %.5f (%.3f sec) b = %.5f R-b = %.5f, LR = %.5f' % (iter, cost_fetched, reward_fetched, duration, avg_b_fetched, rminusb_fetched, lr_fetched))) # f_glimpse_images = np.reshape(glimpse_images_fetched, \ # ( # self.nGlimpses, self.batch_size, self.depth, # self.sensorBandwidth, # self.sensorBandwidth)) # # fillList = False # if len(plotImgs) == 0: # fillList = True # # # display the first image in the in mini-batch # nCols = self.depth + 1 # plt.subplot2grid((self.depth, nCols), (0, 1), rowspan=self.depth, colspan=self.depth) # # display the entire image # self.plotWholeImg(nextX[0, :], self.img_size, sampled_locs_fetched) # # # display the glimpses # for y in range(self.nGlimpses): # txt.set_text('Epoch: %.6d \nPrediction: %i -- Truth: %i\nStep: %i/%i' # % (iter, prediction_labels_fetched[0], correct_labels_fetched[0], # (y + 1), # self.nGlimpses)) # # for x in range(self.depth): # plt.subplot(self.depth, nCols, 1 + nCols * x) # if fillList: # plotImg = plt.imshow(f_glimpse_images[y, 0, x], cmap=plt.get_cmap('gray'), # interpolation="nearest") # plotImg.autoscale() # plotImgs.append(plotImg) # else: # plotImgs[x].set_data(f_glimpse_images[y, 0, x]) # plotImgs[x].autoscale() # fillList = False # # # fig.canvas.draw() # time.sleep(1.15) # plt.pause(0.003) iter += 1 if iter % iterations_per_epoch == 0: print("EPOCH: " + str(epoch)) saver.save(sess, self.ckpt_path) self.evaluate(val_dataset, sess, reward, predicted_labels, correct_labels, glimpse_images=self.glimpse_images) train_dataset.on_epoch_end() val_dataset.on_epoch_end() sess.close()
def fit(self, data): """Fit the generative model of the training data distribution. :param data: DataFrame: Training set """ X_train, Y_train, cols_to_reverse = self._one_hot(data) self.columns_to_reverse = cols_to_reverse self.no, self.X_dim = X_train.shape self.z_dim = int(self.X_dim / 4) self.h_dim = int(self.X_dim) # Feature matrix self.X = tf.placeholder(tf.float32, shape=[None, self.X_dim]) # Target variable self.Y = tf.placeholder(tf.float32, shape=[None, C_DIM]) # Latent space self.Z = tf.placeholder(tf.float32, shape=[None, self.z_dim]) # Conditional variable self.M = tf.placeholder(tf.float32, shape=[None, C_DIM]) self.Y_train = Y_train lamda = np.sqrt(2 * np.log(1.25 * (10 ^ (self.delta)))) / self.epsilon # Data Preprocessing X_train = np.asarray(X_train) self.Min_Val = np.min(X_train, 0) X_train = X_train - self.Min_Val self.Max_Val = np.max(X_train, 0) X_train = X_train / (self.Max_Val + 1e-8) self.dim = len(X_train[:, 0]) # Generator self.G_sample = self._generator(self.Z, self.Y) # Discriminator D_real = self._discriminator(self.X, self.Y) D_fake = self._discriminator(self.G_sample, self.Y) D_entire = tf.concat(axis=0, values=[D_real, D_fake]) # Replacement of Clipping algorithm to Penalty term # 1. Line 6 in Algorithm 1 eps = tf.random_uniform([MB_SIZE, 1], minval=0., maxval=1.) X_inter = eps * self.X + (1. - eps) * self.G_sample # 2. Line 7 in Algorithm 1 grad = tf.gradients(self._discriminator(X_inter, self.Y), [X_inter, self.Y])[0] grad_norm = tf.sqrt(tf.reduce_sum((grad)**2 + 1e-8, axis=1)) grad_pen = LAM * tf.reduce_mean((grad_norm - 1)**2) # Loss function D_loss = tf.reduce_mean((1 - self.M) * D_entire) - tf.reduce_mean( self.M * D_entire) + grad_pen G_loss = -tf.reduce_mean(D_fake) # Solver D_solver = (tf.train.AdamOptimizer( learning_rate=LR, beta1=0.5).minimize(D_loss, var_list=self.theta_D)) G_solver = (tf.train.AdamOptimizer( learning_rate=LR, beta1=0.5).minimize(G_loss, var_list=self.theta_G)) # Start session self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) # Training iterations for _ in tqdm(range(NITER)): for _ in range(NUM_TEACHERS): # Teacher training Z_mb = self._sample_Z(MB_SIZE, self.z_dim) # Teacher 1 X_idx = self._sample_X(self.no, MB_SIZE) X_mb = X_train[X_idx, :] Y_mb = np.reshape(Y_train[X_idx], [MB_SIZE, 1]) M_real = np.ones([ MB_SIZE, ]) M_fake = np.zeros([ MB_SIZE, ]) M_entire = np.concatenate((M_real, M_fake), 0) Normal_Add = np.random.normal(loc=0.0, scale=lamda, size=MB_SIZE * 2) M_entire = M_entire + Normal_Add M_entire = (M_entire > 0.5) M_mb = np.reshape(M_entire.astype(float), (2 * MB_SIZE, 1)) _, D_loss_curr = self.sess.run([D_solver, D_loss], feed_dict={ self.X: X_mb, self.Z: Z_mb, self.M: M_mb, self.Y: Y_mb }) # Generator Training Z_mb = self._sample_Z(MB_SIZE, self.z_dim) X_idx = self._sample_X(self.no, MB_SIZE) Y_mb = np.reshape(Y_train[X_idx], [MB_SIZE, 1]) _, G_loss_curr = self.sess.run([G_solver, G_loss], feed_dict={ self.Z: Z_mb, self.Y: Y_mb }) self.trained = True
def main(unused_argv): del unused_argv # Load the label map. print(' - Loading the label map...') label_map_dict = {} if FLAGS.label_map_format == 'csv': with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file: reader = csv.reader(csv_file, delimiter=':') for row in reader: if len(row) != 2: raise ValueError( 'Each row of the csv label map file must be in ' '`id:name` format.') id_index = int(row[0]) name = row[1] label_map_dict[id_index] = { 'id': id_index, 'name': name, } else: raise ValueError('Unsupported label map format: {}.'.format( FLAGS.label_map_format)) with tf.Session(graph=tf.Graph()) as sess: print(' - Loading saved model...') meta_graph_def = tf.saved_model.load( sess, [tf.saved_model.tag_constants.SERVING], FLAGS.saved_model_dir) print(dict(meta_graph_def.signature_def['serving_default'].inputs)) inputs = dict(meta_graph_def.signature_def['serving_default'].inputs) outputs = dict(meta_graph_def.signature_def['serving_default'].outputs) image_node = tf.placeholder(shape=(), dtype=tf.string) output_nodes = { 'num_detections': outputs['num_detections'].name, 'detection_boxes': outputs['detection_boxes'].name, 'detection_classes': outputs['detection_classes'].name, 'detection_attributes': outputs['detection_attributes'].name, 'detection_scores': outputs['detection_scores'].name, } if 'detection_masks' in outputs: output_nodes['detection_masks'] = outputs['detection_masks'].name if not FLAGS.use_normalized_coordinates: if 'image_info' not in outputs: raise ValueError( 'If `use_normalized_coordinates` = False, `image_info`' ' node must be included in the SavedModel.') output_nodes['image_info'] = outputs['image_info'].name image_with_detections_list = [] image_files = tf.gfile.Glob(FLAGS.image_file_pattern) for i, image_file in enumerate(image_files): print(' - processing image %d...' % i) with tf.gfile.GFile(image_file, 'rb') as f: image_bytes = f.read() image = Image.open(image_file) #print(image) image = image.convert('RGB') # needed for images with 4 channels. width, height = image.size output1 = io.BytesIO() image.save(output1, format="JPEG") image_as_string = output1.getvalue() np_image = (np.array(image.getdata()).reshape(height, width, 3).astype(np.uint8)) np_image_input = input_utils.normalize_image_np(np_image) np_image_input = np.float32(np_image_input.reshape(-1)) print(np_image_input) #np_image_input = np.float32(np_image_input.reshape(1)) output_results = sess.run(output_nodes, feed_dict={image_node: image_bytes}) num_detections = int(output_results['num_detections'][0]) np_boxes = output_results['detection_boxes'][0, :num_detections] if not FLAGS.use_normalized_coordinates: np_image_info = output_results['image_info'][0] np_boxes = np_boxes / np.tile(np_image_info[1:2, :], (1, 2)) ymin, xmin, ymax, xmax = np.split(np_boxes, 4, axis=-1) ymin = ymin * height ymax = ymax * height xmin = xmin * width xmax = xmax * width np_boxes = np.concatenate([ymin, xmin, ymax, xmax], axis=-1) np_scores = output_results['detection_scores'][0, :num_detections] np_classes = output_results['detection_classes'][ 0, :num_detections] np_classes = np_classes.astype(np.int32) np_attributes = output_results['detection_attributes'][ 0, :num_detections, :] np_masks = None if 'detection_masks' in output_results: np_masks = output_results['detection_masks'][ 0, :num_detections] np_masks = mask_utils.paste_instance_masks( np_masks, box_utils.yxyx_to_xywh(np_boxes), height, width) image_with_detections = ( visualization_utils.visualize_boxes_and_labels_on_image_array( np_image, np_boxes, np_classes, np_scores, label_map_dict, instance_masks=np_masks, use_normalized_coordinates=False, max_boxes_to_draw=FLAGS.max_boxes_to_draw, min_score_thresh=FLAGS.min_score_threshold)) image_with_detections_list.append(image_with_detections) print(' - Saving the outputs...') formatted_image_with_detections_list = [ Image.fromarray(image.astype(np.uint8)) for image in image_with_detections_list ] html_str = '<html>' image_strs = [] for formatted_image in formatted_image_with_detections_list: with io.BytesIO() as stream: formatted_image.save(stream, format='JPEG') data_uri = base64.b64encode(stream.getvalue()).decode('utf-8') image_strs.append( '<img src="data:image/jpeg;base64,{}", height=800>'.format( data_uri)) images_str = ' '.join(image_strs) html_str += images_str html_str += '</html>' with tf.gfile.GFile(FLAGS.output_html, 'w') as f: f.write(html_str)
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias=0.1, attention_size=10, ): def lstm_cell(): return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False) backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell() for _ in range(num_layers)], state_is_tuple=False) forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell() for _ in range(num_layers)], state_is_tuple=False) self.X = tf.placeholder(tf.float32, [None, None, size]) self.Y = tf.placeholder(tf.float32, [None, output_size]) drop_backward = tf.nn.rnn_cell.DropoutWrapper( backward_rnn_cells, output_keep_prob=forget_bias) drop_forward = tf.nn.rnn_cell.DropoutWrapper( forward_rnn_cells, output_keep_prob=forget_bias) self.backward_hidden_layer = tf.placeholder(tf.float32, shape=(None, num_layers * 2 * size_layer)) self.forward_hidden_layer = tf.placeholder(tf.float32, shape=(None, num_layers * 2 * size_layer)) outputs, last_state = tf.nn.bidirectional_dynamic_rnn( drop_forward, drop_backward, self.X, initial_state_fw=self.forward_hidden_layer, initial_state_bw=self.backward_hidden_layer, dtype=tf.float32, ) outputs = list(outputs) attention_w = tf.get_variable('attention_v1', [attention_size], tf.float32) query = tf.layers.dense( tf.expand_dims(last_state[0][:, size_layer:], 1), attention_size) keys = tf.layers.dense(outputs[0], attention_size) align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2]) align = tf.nn.tanh(align) outputs[0] = tf.squeeze( tf.matmul(tf.transpose(outputs[0], [0, 2, 1]), tf.expand_dims(align, 2)), 2, ) outputs[0] = tf.concat([outputs[0], last_state[0][:, size_layer:]], 1) attention_w = tf.get_variable('attention_v2', [attention_size], tf.float32) query = tf.layers.dense( tf.expand_dims(last_state[1][:, size_layer:], 1), attention_size) keys = tf.layers.dense(outputs[1], attention_size) align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2]) align = tf.nn.tanh(align) outputs[1] = tf.squeeze( tf.matmul(tf.transpose(outputs[1], [0, 2, 1]), tf.expand_dims(align, 2)), 2, ) outputs[1] = tf.concat([outputs[1], last_state[1][:, size_layer:]], 1) with tf.variable_scope('decoder', reuse=False): self.backward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell() for _ in range(num_layers)], state_is_tuple=False) self.forward_rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell() for _ in range(num_layers)], state_is_tuple=False) backward_drop_dec = tf.nn.rnn_cell.DropoutWrapper( self.backward_rnn_cells_dec, output_keep_prob=forget_bias) forward_drop_dec = tf.nn.rnn_cell.DropoutWrapper( self.forward_rnn_cells_dec, output_keep_prob=forget_bias) self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn( forward_drop_dec, backward_drop_dec, self.X, initial_state_fw=outputs[0], initial_state_bw=outputs[1], dtype=tf.float32, ) self.outputs = list(self.outputs) attention_w = tf.get_variable('attention_v3', [attention_size], tf.float32) query = tf.layers.dense( tf.expand_dims(self.last_state[0][:, size_layer:], 1), attention_size, ) keys = tf.layers.dense(self.outputs[0], attention_size) align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2]) align = tf.nn.tanh(align) self.outputs[0] = tf.squeeze( tf.matmul( tf.transpose(self.outputs[0], [0, 2, 1]), tf.expand_dims(align, 2), ), 2, ) attention_w = tf.get_variable('attention_v4', [attention_size], tf.float32) query = tf.layers.dense( tf.expand_dims(self.last_state[1][:, size_layer:], 1), attention_size, ) keys = tf.layers.dense(self.outputs[1], attention_size) align = tf.reduce_sum(attention_w * tf.tanh(keys + query), [2]) align = tf.nn.tanh(align) self.outputs[1] = tf.squeeze( tf.matmul( tf.transpose(self.outputs[1], [0, 2, 1]), tf.expand_dims(align, 2), ), 2, ) self.outputs = tf.concat(self.outputs, 1) self.logits = tf.layers.dense(self.outputs, output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost)