def build_network(self): with tf.variables_scope(self.name): # None for shape because do not know in advance the batch of inputs beforehand self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') # only take into account the actions in the second hidden layer of the critic neural net self.actions = tf.placeholder(tf.float32, shape=[None, *self, n_actions], name='actions') # woth Q learning, wa have a target value, quantity yi in the pseudo-code in the paper # batch size 1 because scaler self.q_target = tf.placeholder(tf.float32, shape=[None, 1], name='targets') f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( self.input, units=self.fc1_dims, kernel_initializer=tf.random_uniform_initializer(-f1, f1), bias_initializer=tf.random_uniform_initializer(-f1, f1)) # batch normalization batch1 = tf.layers.batch_normalization(dense1) # debate about wether or not we should do the activation before or after the batch normalization # here decide to do it after because relu might troncate to much for the needed statistics layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=tf.random_uniform_initializer(-f2, f2), bias_initializer=tf.random_uniform_initializer(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) # get rid of the activation when comparing to Actor network, because take into consideration the actions action_in = tf.layers.dense(self.actions, units=self.fc2_dims, activation='relu') state_actions = tf.add(batch2, action_in) state_actions = tf.nn.relu(state_actions) # calculate the actual output of the layer f3 = 0.003 self.q = tf.layers.dens( state_actions, units=1, kernel_initializer=tf.random_uniform_initializer(-f3, f3), bias_initializer=tf.random_uniform_initializer(-f3, f3), kernel_regularizer=tf.keras.regularizers.l2(0.01)) # self.q is the output of the deep neural net self.loss = tf.losses.mean_squared_error(self, q_target, self.q)
def linear(input_, output_dim, stddev=0.02, name=None, with_w=False): input_dim = input_.get_shape()[-1] with tf.variables_scope(name or "linear"): w = tf.get_variable( 'w', [input_dim, output_dim], initializer=tf.truncated_normal_initializer(stddev=staddev)) biases = tf.get_variable('b', [output_dim], initializer=tf.constant_initializer(0.0)) if with_w: return tf.matmul(input_, w) + biases, w, biases else: return tf.matmul(input_, w) + biases
def build_network(self): # every net gets its own scope with tf.variables_scope(self.name): # name for debugging self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims], name='inputs') # gradient of Q w.r.t to each action so number of dimensions = number of actions self.action_gradient = tf.placeholder(tf.float32, shape=[None, self.n_actions]) # construct the actual net f1 = 1 / np.sqrt(self.fc1_dims) dense1 = tf.layers.dense( self.input, units=self.fc1_dims, kernel_initializer=tf.random_uniform_initializer(-f1, f1), bias_initializer=tf.random_uniform_initializer(-f1, f1)) # batch normalization batch1 = tf.layers.batch_normalization(dense1) # debate about wether or not we should do the activation before or after the batch normalization # here decide to do it after because relu might troncate to much for the needed statistics layer1_activation = tf.nn.relu(batch1) f2 = 1 / np.sqrt(self.fc2_dims) dense2 = tf.layers.dense( layer1_activation, units=self.fc2_dims, kernel_initializer=tf.random_uniform_initializer(-f2, f2), bias_initializer=tf.random_uniform_initializer(-f2, f2)) batch2 = tf.layers.batch_normalization(dense2) layer2_activation = tf.nn.relu(batch2) # output layer, the actual policy of our region (deterministic) f3 = 0.003 mu = tf.layers.dens( layer2_activation, units=self.n_actions, activation='tanh', kernel_initializer=tf.random_uniform_initializer(-f3, f3), bias_initializer=tf.random_uniform_initializer(-f3, f3)) # take into account that our environment may very well require actions that have values greater than +- 1 self.mu = tf.multiply(mu, self.action_bound)
def train_multi_GPUs(): tf.set_random_seed(args.RANDOM_SEED) img_batch, dep_batch = LoadData(args.FILELIST, args.BS, args.DATA_DIR, args.IMAGE_SIZE) img_batch = tf.image.resize_nearest_neighbor(img_batch, args.INPUT_SIZE) dep_batch = tf.image.resize_nearest_neighbor(dep_batch, args.INPUT_SIZE) # learning rate poly strategy lr_ph = tf.placeholder(tf.float32, shape=[]) base_lr = tf.constant(args.LEARNING_RATE) learning_rate = tf.scalar_mul(base_lr, tf.pow(1. - lr_ph / args.STEPS, 0.9)) # optimizer opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # it is the same as train_single_GPU to here #multi gpus tower_grads = [] with tf.variables_scope(tf.get_variable_scope()): for i in range(args.NUM_GPUS): with tf.device('/gpu:%d' % i) as scope: #batch data on each gpu img_batch_tower = img_batch[i * args.BS:(i + 1) * args.BS, :, :] dep_batch_tower = dep_batch[i * args.BS:(i + 1) * args.BS, :, :] all_trainable = tf.trainable_variables() out = Net() loss = L1loss(out, dep_batch_tower) # if restore: # restore_var = ... cur_grads = tf.gradients(loss, all_trainable) tower_grads.append(cur_grads) grads = average_gradients(tower_grads) train_op = opt.apply_gradients(zip(grads, all_trainable))
#The model encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') encoder_inputs_length = tf.placeholder(shape=(None, ), dtype=tf.int32, name='encoder_inputs_length') decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets') with tf.variables_scope("embeddings"): embeddings = tf.Variable(word_embeddings_np, name="word_embeds", dtype=tf.float32, trainable=False) encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) with tf.variables_scope("encoder"): encoder_cell = LSTMCell(encoder_hidden_units) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, inputs=encoder_inputs_embedded,
def __init__(self): self.vocab_size = 10000 #字典长度 self.memory_size = 300 #RNN单元长度 self.embedding_size = 300 #词向量维度 self.dialog_length = 30 #最长支持的会话(两个人交替说话) self.user_sentence_length = 50 #用户的描述,最长长度 self.waiter_sentence_length = 200 #客服的话,最长长度 self.batch_size = 5 #还是要想办法实现batch? self.learning_rate = tf.Variable(float(0.5), trainable=False) #学习率 self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * 0.995) #学习率递减 self.global_step = tf.Variable(0, trainable=False) #记录训练了多少次 output_projection = None #decoder在解析过程中需要将维度变为字典长度,以进行softmax softmax_loss_function = None #softmax损失函数 num_samples = 512 #采样softmax ####################### if num_samples > 0 and num_samples<self.target_vocab_size: #当sampled_softmax的最高采样数小于字典长度时,需要提供一个映射的权值矩阵 #仅使用在解码器中使用 w = tf.get_variable('proj_w',[self.memory_size,self.vocab_size]) w_t = tf.transpose(w) b = tf.get_variable('proj_b',[self.vocab_size]) output_projection = (w,b) def sample_loss(inputs,labels): labels = tf.reshape(labels,[-1,1]) #将labels进行展开 local_w_t = tf.cast(w_t,tf.float32) local_b = tf.cast(b,tf.float32) local_inputs = tf.cast(inputs,tf.float32) return tf.nn.sampled_softmax_loss(local_w_t,local_b,local_inputs,labels,num_samples,self.target_vocab_size) softmax_loss_function = sample_loss ######################## #定义使用的各个单元 encoder_cells = [] #用于编码的encoder_cell 相互之间参数共享 decoder_cells = [] #用于解码的decoder_cell 相互之间参数共享 context_cell =tf.nn.rnn_cell.GRUCell(self.memory_size) #用于编码上下文的RNNcell,独此一份无需共享 with tf.variables_scope('hred_encoder'): for i in range(self.dialog_length): context_cell =tf.nn.rnn_cell.GRUCell(self.memory_size) #用于编码上下文的RNNcell,独此一份无需共享 #300维的GRUCell ###################### self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(self.encoder_length): self.encoder_inputs.append(tf.placeholder(tf.int32,shape=[None],name='encoder{0}'.format(i))) for i in xrange(self.decoder_length): self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) targets = [self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1)] ###################### self.outputs, self.states = tf.nn.seq2seq.embedding_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, single_cell, num_encoder_symbols=self.source_vocab_size, num_decoder_symbols=self.target_vocab_size, embedding_size=size, output_projection=output_projection, feed_previous=False ) self.losses = tf.nn.seq2seq.sequence_loss( self.outputs[:-1], targets, self.target_weights[:-1], softmax_loss_function=softmax_loss_function )
def __init__(self, sess, data, name, kernel_widths, kernel_filters, batch_size=100, embedding=15, learn_rate=1.0, lstm_dims=[100], highways=0, max_gradient_norm=5.0, resample=False, restore=None): self.sess = sess self.name = name self.loader = CharLoader(data=data, batch_size=batch_size, resample=resample) try: # to restore object if desired if not restore: raise IOError if os.path.isfile("tmp_%s/%s.meta" % (self.name, restore)): print("attempting to restore model from %s" % restore) meta_file = "tmp_%s/%s.meta" % (self.name, restore) else: # get most recent file print("attempting to restore model from latest checkpoint") files = glob.glob("tmp_%s/*.meta" % self.name) if not files: raise IOError files.sort(key=lambda x: -os.path.getmtime(x)) meta_file = files[0] checkpoint_name = meta_file.split('.')[0] self.saver = tf.train.import_meta_graph(meta_file) self.saver.restore(self.sess, checkpoint_name) # restore class variables if os.path.isfile("tmp_%s/loss.pkl" % self.name): with open("tmp_%s/loss.pkl" % self.name, 'r') as f: self.train_loss, self.valid_loss, self.rate = pickle.load(f) else: self.train_loss = list() self.valid_loss = list() graph = tf.get_default_graph() self.input_chars = graph.get_tensor_by_name('%s/char_ids:0' % self.name) self.true_emojis = graph.get_tensor_by_name('%s/emoji_ids:0' % self.name) self.keep_rate = graph.get_tensor_by_name('%s/full/keep_rate:0' % self.name) self.prediction = graph.get_tensor_by_name('%s/prediction:0' % self.name) self.loss = graph.get_tensor_by_name('%s/loss:0' % self.name) self.learn_rate = graph.get_tensor_by_name('%s/learn_rate:0' % self.name) self.global_step = graph.get_tensor_by_name('%s/global_step:0' % self.name) self.trainer = graph.get_operation_by_name('%s/trainer' % self.name) print("restored from %s" % checkpoint_name) except (IOError, tf.errors.NotFoundError) as e: # initialize object as normal if restore: # if failed to restore, reset session print("failed to restore model") #tf.reset_default_graph() clear graph print("building model") with tf.variable_scope(self.name): # embed chars self.input_chars = tf.placeholder(tf.int32, [None, self.loader.max_seq_len, self.loader.max_word_len], name='char_ids') char_embeds = tf.get_variable("char_embedding", shape=[self.loader.char_vocab_size, embedding] initializer=tf.random_uniform_initializer(minval=-0.5, maxval=0.5)) # initializers for weights and biases unif_init = tf.random_uniform_initializer(minval=-0.05, maxval=0.05) cnst_init = tf.constant_initializer(0.1) # create convolutions with tf.variable_scope('conv'): cnn_outputs = list() char_indices = tf.split(self.input_chars, self.loader.max_seq_len, 1) for i in xrange(self.loader.max_seq_len): embedded_chars = tf.nn.embedding_lookup(char_embeds, char_indices[i]) temp_output = list() for width, filters in zip(kernel_widths, kernel_filters): kernel = tf.get_variable(name="kernel_%s_%s" % (width, filters), shape=[width, embedding, filters], initializer=unif_init) bias = tf.get_variable(name="kernel_bias_%s_%s" % (width, filters), shape=[filters], initializer=cnst_init) conv = tf.nn.conv1d(embedded_chars, kernel, 1, 'VALID') + bias pool = tf.reduce_max(conv, axis=1) temp_output.append(pool) cnn_outputs.append(tf.concat(temp_output, axis=1)) # initializer and expected cnn output dimension N = sum([width*filters for width, filters in zip(kernel_widths, kernel_filters)]) neg_init = tf.constant_initializer(-1) # create highway network with tf.variables_scope('hwy'): hwy_inputs = cnn_outputs if highways > 0: for i in xrange(highways): hwy_outputs = list() W_T = tf.get_variable(name="transform_%d_weight" % (i+1), shape=[N, N], initializer=unif_init) b_T = tf.get_variable(name="transform_%d_bias" % (i+1), shape=[N], initializer=neg_init) W_H = tf.get_variable(name="carry_%d_weight" % (i+1), shape=[N, N], initializer=unif_init) b_H = tf.get_variable(name="carry_%d_bias" % (i+1), shape=[N], initializer=neg_init) for hwy_input in hwy_inputs: trans_gate = tf.sigmoid(tf.matmul(hwy_input, W_T) + b_T) trans_output = trans_gate * (tf.nn.relu(tf.matmul(hwy_input, W_H)) + b_H) carry_output = (1 - trans_gate) * hwy_input hwy_outputs.append(trans_output + carry_output) hwy_inputs = hwy_outputs else: hwy_outputs = hwy_inputs