def build_model(self, reuse=False): with tf.variable_scope(self.name): if reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse policy_state_holder, logits, Value_state_holder, Value = define_networks() self.policy_state_holder = policy_state_holder self.value_state_holder = Value_state_holder self.Value = Value self.valid_action_mask = tf.placeholder(tf.float32, [None, 61], name='valid_action_mask') exp_action_logits = tf.math.exp(logits) exp_valid_action_logits = tf.multiply(exp_action_logits, self.valid_action_mask) self.action_probs = exp_valid_action_logits / tf.expand_dims(tf.reduce_sum(exp_valid_action_logits, axis=-1), 1) self.Value_target = tf.placeholder(tf.float32, [None, 1], name='Value_target') self.Value_loss = 1 / 2 * tf.reduce_mean(tf.square(self.Value - self.Value_target)) self.entry_diffs = tf.square(self.Value - self.Value_target) Value_counter_dis = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) self.Value_lr = tf.train.exponential_decay(self.initial_lr, Value_counter_dis, 300000, 0.96, staircase=True) self.Value_opt = layers.optimize_loss(loss=self.Value_loss, learning_rate=self.Value_lr, optimizer=tf.train.AdamOptimizer, clip_gradients=100., global_step=Value_counter_dis) self.Policy_target = tf.placeholder(tf.float32, [None, 61], name='Policy_target') self.Policy_loss = 1 / 2 * tf.reduce_mean(tf.square(self.action_probs - self.Policy_target)) Policy_counter_dis = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) self.Policy_lr = tf.train.exponential_decay(self.initial_lr, Policy_counter_dis, 300000, 0.96, staircase=True) self.Policy_opt = layers.optimize_loss(loss=self.Policy_loss, learning_rate=self.Policy_lr, optimizer=tf.train.AdamOptimizer, clip_gradients=100., global_step=Policy_counter_dis)
def _get_optimizers_GAN(scores_g_z, scores_x, learning_rate): loss_dis = -(tf.reduce_mean(tf.log(1 - scores_g_z) + tf.log(scores_x))) loss_gen = tf.reduce_mean(tf.log(1 - scores_g_z)) # global_step global_step_d = tf.Variable(initial_value=0, trainable=False, name='global_step_d') global_step_g = tf.Variable(initial_value=0, trainable=False, name='global_step_g') # vars vars_d = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator') vars_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') # optms optm_d = ly.optimize_loss(loss_dis, global_step_d, learning_rate, tf.train.RMSPropOptimizer, variables=vars_d, name='optm_d', summaries=OPTIMIZER_SUMMARIES) optm_g = ly.optimize_loss(loss_gen, global_step_g, learning_rate, tf.train.RMSPropOptimizer, variables=vars_g, name='optm_g', summaries=OPTIMIZER_SUMMARIES) return optm_d, optm_g
def main(input): # layer_1 = slim.conv2d(input, 10, [5, 5], 2, scope='layer_1') # 默认是nn.relu激活 # layer_1_bn = slim.batch_norm() # tf.metrics.mean() # tf.identity() # with slim.arg_scope() tf.nn.conv2d() # tf.squeeze() # slim.utils.convert_collection_to_dict() # tf.add() # tf.multiply() # tf.subtract() # tf.squeeze() # tf.square() # tf.map_fn() # tf.expand_dims() # tf.placeholder() # tf.get_default_graph() # tf.device() # tf.split() tf.ones_like() tf.losses.add_loss() tf_contrib_layers.optimize_loss() tf.reduce_mean() tf.nn.sigmoid_cross_entropy_with_logits()
def gan_model(feature, unused_target): z = tf.random_uniform(tf.shape(feature), -1, 1, dtype=feature.dtype) z.set_shape(feature.get_shape()) feature_generated = generator(z, 10) discr_true = discriminator(feature, 10) discr_generated = discriminator(feature_generated, 10, reuse=True) loss_discr = tf.reduce_mean(-tf.log(discr_true) - tf.log(1 - discr_generated)) loss_generator = tf.reduce_mean(-tf.log(discr_generated)) variables = tf.trainable_variables() generator_params = [v for v in variables if v.name.startswith('Generator/')] discriminator_params = [v for v in variables if v.name.startswith('Discriminator/')] gc = tf.contrib.framework.get_global_step() learning_rate = tf.train.exponential_decay( 0.005, gc, 150, 0.95, staircase=True) with tf.variable_scope('Discriminator'): discriminator_train_op = layers.optimize_loss( loss_discr, gc, variables=discriminator_params, learning_rate=learning_rate, optimizer='Adam', summaries=[]) with tf.variable_scope('Generator'): generator_train_op = layers.optimize_loss( loss_generator, gc, variables=generator_params, learning_rate=learning_rate, optimizer='Adam', summaries=[]) return (feature_generated, loss_discr + loss_generator, tf.group(discriminator_train_op, generator_train_op))
def __init__(self, sess, model_fn, input_size, num_action, game, restore=False, discount=0.99, lr=1e-4, vf_coef=0.25, ent_coef=1e-3, clip_grads=1., agenttype="vpg"): self.sess, self.discount = sess, discount self.vf_coef, self.ent_coef = vf_coef, ent_coef self.game = game self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step') self.agenttype = agenttype if game == "Pong-v0": (self.policy, self.value), self.inputs = model_fn(input_size, num_action) #print(sample(self.policy)) self.action = sample(self.policy) else: (self.policy, self.value), self.inputs = model_fn(num_action) self.action = sample(self.policy) loss_fn, loss_val, self.loss_inputs = self._loss_func() self.step = tf.Variable(0, trainable=False) #opt = tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.99, epsilon=1e-5) opt = tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-5) #self.train_op = layers.optimize_loss(loss=loss_fn, optimizer=opt, learning_rate=None, global_step= self.global_step_tensor, clip_gradients=clip_grads) #self.train_op_val = layers.optimize_loss(loss=loss_val, optimizer=opt, learning_rate=None, global_step= self.global_step_tensor, clip_gradients=clip_grads) self.train_op = layers.optimize_loss( loss=loss_fn, optimizer=opt, learning_rate=None, global_step=self.global_step_tensor) self.train_op_val = layers.optimize_loss( loss=loss_val, optimizer=opt, learning_rate=None, global_step=self.global_step_tensor) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() if restore: self.saver.restore( self.sess, tf.train.latest_checkpoint('weights/' + self.game)) self.summary_op = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter('logs/' + self.game, graph=None) self.summary_writer.add_session_log( tf.SessionLog(status=tf.SessionLog.START), sess.run(self.step))
def build_graph(self): #build loss, optimizer for generator and critic noise_dist = tf.contrib.distributions.Normal(0., 1.) #input noise z z = noise_dist.sample((self.batch_size, self.z_dim)) # create generator and discriminator/critic if not self.is_mlp: generator = self.generator_conv critic = self.critic_conv else: generator = self.generator_mlp critic = self.critic_mlp with tf.variable_scope('generator'): train = generator(z) real_data = tf.placeholder( dtype=tf.float32, shape=(self.batch_size, self.s, self.s, self.channel)) true_logit = critic(real_data) fake_logit = critic(train, reuse=True) c_loss = tf.reduce_mean(fake_logit - true_logit) # alternative way for weight clipping(wgan-gp), help decrease instability if self.mode is 'gp': alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = real_data + alpha*(train-real_data) inte_logit = critic(interpolated, reuse=True) gradients = tf.gradients(inte_logit, [interpolated])[0] # gradients = tf.gradients(inte_logit, [interpolated,])[0] grad_l2 = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1,2,3])) gradient_penalty = tf.reduce_mean((grad_l2-1)**2) gp_loss_sum = tf.summary.scalar("gp_loss", gradient_penalty) grad = tf.summary.scalar("grad_norm", tf.nn.l2_loss(gradients)) c_loss += lam*gradient_penalty g_loss = tf.reduce_mean(-fake_logit) g_loss_sum = tf.summary.scalar("g_loss", g_loss) c_loss_sum = tf.summary.scalar("c_loss", c_loss) img_sum = tf.summary.image("img", train, max_outputs=10) theta_g = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_g = ly.optimize_loss(loss=g_loss, learning_rate=self.learning_rate_ger, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if self.is_adam is True else tf.train.RMSPropOptimizer, variables=theta_g, global_step=counter_g, summaries = ['gradient_norm']) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_c = ly.optimize_loss(loss=c_loss, learning_rate=self.learning_rate_dis, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if self.is_adam is True else tf.train.RMSPropOptimizer, variables=theta_c, global_step=counter_c, summaries = ['gradient_norm']) # weight clipping for wgan, enforce a Lipschitz constraint on the critic if self.mode is 'regular': clipped_var_c = [tf.assign(var, tf.clip_by_value(var, self.clamp_lower, self.clamp_upper)) for var in theta_c] # merge the clip operations on critic variables with tf.control_dependencies([opt_c]): opt_c = tf.tuple(clipped_var_c) if not self.mode in ['gp', 'regular']: raise(NotImplementedError('Only two modes')) return opt_g, opt_c, real_data
def build_train_MSR_face_graph(real_img, batch_size=64, latent_dims=1024, lr_g=5e-5, lr_c=5e-5, clamp_lower=-0.01, clamp_upper=0.01): z = tf.random_normal([batch_size, latent_dims]) # real_img=tf.placeholder(tf.float32,[batch_size,28,28,1]) # face generator with tf.variable_scope('generator'): generate_img = generator(z, [4, 4, 128], [64, 64, 3], tf.tanh, L.xavier_initializer(uniform=False)) fake_logit = critic(generate_img, 32, 4, L.xavier_initializer(uniform=False)) true_logit = critic(real_img, 32, 4, L.xavier_initializer(uniform=False), True) tf.summary.image('img', generate_img, max_outputs=10) theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') c_loss = tf.reduce_mean(fake_logit - true_logit) g_loss = tf.reduce_mean(-fake_logit) tf.summary.scalar('c_loss', c_loss) counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_g = L.optimize_loss(loss=g_loss, learning_rate=lr_g, optimizer=tf.train.RMSPropOptimizer, variables=theta_g, global_step=counter_g, summaries=['gradient_norm']) opt_c = L.optimize_loss(loss=c_loss, learning_rate=lr_c, optimizer=tf.train.RMSPropOptimizer, variables=theta_c, global_step=counter_c, summaries=['gradient_norm']) clipped_var_c = [ tf.assign(var, tf.clip_by_value(var, clamp_lower, clamp_upper)) for var in theta_c ] # merge the clip operations on critic variables with tf.control_dependencies([opt_c]): opt_c = tf.tuple(clipped_var_c) return opt_g, opt_c, c_loss
def build(self): self.output = self._generator(self.input, name='gene') self.content_loss = tf.reduce_mean(tf.multiply(tf.log1p(self.output),\ tf.abs(tf.subtract(self.target, self.output)))) assert ten_sh(self.output) == ten_sh(self.target) self.eva_op = tf.concat(1, \ (tf.exp(self.input*12.0)-1, tf.exp(self.output*8.0)-1), name='eva_op') self.concat_output = tf.exp(tf.concat(1, (self.input, self.output))) self.concat_target = tf.exp(tf.concat(1, (self.input, self.target))) self.fake_em = self._critic(self.concat_output, name='critic') self.true_em = self._critic(self.concat_target, name='critic', reuse=True) self.c_loss = tf.reduce_mean(self.fake_em - self.true_em, name='c_loss') self.g_loss = tf.reduce_mean(-self.fake_em, name='g_loss') ####summary#### conntent_loss_sum = tf.summary.scalar('content_loss', self.content_loss) c_loss_sum = tf.summary.scalar('c_loss', self.c_loss) g_loss_sum = tf.summary.scalar('g_loss', self.g_loss) img_sum = tf.summary.image('gene_img', self.concat_output, max_outputs=1) img_sum = tf.summary.image('tar_img', self.concat_target, max_outputs=1) self.summary = tf.summary.merge_all() ############## theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='gene') theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) self.c_opt = ly.optimize_loss(loss=self.c_loss, learning_rate=self.c_lr,\ optimizer=tf.train.RMSPropOptimizer,\ variables=theta_c,\ global_step=counter_c) self.g_opt = ly.optimize_loss(self.g_loss, learning_rate=self.g_lr,\ optimizer=tf.train.RMSPropOptimizer,\ variables=theta_g,\ global_step=counter_g) self.content_opt = ly.optimize_loss(self.content_loss, learning_rate=self.g_lr,\ optimizer=tf.train.RMSPropOptimizer,\ variables=theta_g,\ global_step=counter_g) clipped_c_var = [tf.assign(var, tf.clip_by_value(var, self.clamp_lower, self.clamp_upper)) \ for var in theta_c] with tf.control_dependencies([self.c_opt]): self.c_opt = tf.tuple(clipped_c_var)
def build_graph(self): noise_dist = tf.contrib.distributions.Normal(0., 1.) z = noise_dist.sample((self.batch_size, int(3 * self.number / 4))) recover = tf.Variable(False, "recover") generator = self.generator_mlp if self.is_mlp_g else self.generator_conv critic = self.critic_mlp if self.is_mlp_c else self.critic_conv with tf.variable_scope('generator'): train, train_names = generator(z, recover=recover) real_data = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, 1, self.number, 1)) true_logit = critic(real_data) fake_logit = critic(train, reuse=True) if self.loss == 'WGAN': c_loss, g_loss = self.wgan_loss(real_data, train, critic, true_logit, fake_logit) if self.loss == "LSGAN": c_loss, g_loss = self.ls_GAN(true_logit, fake_logit) print(c_loss, g_loss) tf.summary.scalar('c_loss', c_loss) tf.summary.scalar('g_loss', g_loss) theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_g = ly.optimize_loss( loss=g_loss, learning_rate=learning_rate_ger, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if is_adam is True else tf.train.RMSPropOptimizer, variables=theta_g, global_step=counter_g, summaries=['gradient_norm']) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_c = ly.optimize_loss( loss=c_loss, learning_rate=learning_rate_dis, optimizer=partial(tf.train.AdamOptimizer, beta1=0.5, beta2=0.9) if is_adam is True else tf.train.RMSPropOptimizer, variables=theta_c, global_step=counter_c, summaries=['gradient_norm']) tf.summary.scalar('c_loss', opt_c) tf.summary.scalar('g_loss', opt_g) merged = tf.summary.merge_all() return opt_g, opt_c, real_data, train_names, merged, c_loss, g_loss
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) self.is_training = tf.placeholder_with_default(True, []) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=concat_elu, normalizer_fn=layers.batch_norm, normalizer_params={ 'scale': True, 'is_training': self.is_training }): with tf.variable_scope("model"): D1 = discriminator(self.input_tensor) # positive examples D_params_num = len(tf.trainable_variables()) G = decoder(tf.random_normal([batch_size, hidden_size])) self.sampled_tensor = G with tf.variable_scope("model", reuse=True): D2 = discriminator(G) # generated examples D_loss = self.__get_discrinator_loss(D1, D2) G_loss = self.__get_generator_loss(D2) params = tf.trainable_variables() D_params = params[:D_params_num] G_params = params[D_params_num:] update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) g_update_ops = [ op for op in update_ops if op.name.startswith('model_1/') ] d_update_ops = [op for op in update_ops if op not in g_update_ops] # train_discrimator = optimizer.minimize(loss=D_loss, var_list=D_params) # train_generator = optimizer.minimize(loss=G_loss, var_list=G_params) global_step = tf.contrib.framework.get_or_create_global_step() with tf.control_dependencies(d_update_ops): self.train_discrimator = layers.optimize_loss(D_loss, global_step, learning_rate / 10, 'Adam', variables=D_params, update_ops=[]) with tf.control_dependencies(g_update_ops): self.train_generator = layers.optimize_loss(G_loss, global_step, learning_rate, 'Adam', variables=G_params, update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def _build_graph(self): z = tf.placeholder(tf.float32, shape=(self.batch_size, self.hidden_size)) with tf.variable_scope('generator'): if self.version == 0: self.g_out = model.generator_conv(z, 3) if self.version == 1: self.g_out = model.generator_conv_v2(z, 3) #real_data = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, self.img_size[0], self.img_size[1], 3)) real_data = customDataGeter.input(self.data_directory, self.img_size, self.batch_size) if self.version == 0: true_logit = model.critic_conv(real_data, self.batch_size) fake_logit = model.critic_conv(self.g_out, self.batch_size, reuse=True) if self.version == 1: true_logit = model.critic_conv_v2(real_data, self.batch_size) fake_logit = model.critic_conv_v2(self.g_out, self.batch_size, reuse=True) # define the loss self.c_loss = tf.reduce_mean(fake_logit - true_logit) self.g_loss = tf.reduce_mean(-fake_logit) g_loss_sum = tf.summary.scalar("g_loss", self.g_loss) c_loss_sum = tf.summary.scalar("c_loss", self.c_loss) # img summary img_sum = tf.summary.image("img", self.g_out, max_outputs=10) # get the params theta_g = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') theta_c = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='critic') counter_g = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_g = ly.optimize_loss(loss=self.g_loss, learning_rate=self.learning_rate, optimizer=tf.train.RMSPropOptimizer, variables=theta_g, global_step=counter_g) counter_c = tf.Variable(trainable=False, initial_value=0, dtype=tf.int32) opt_c = ly.optimize_loss(loss=self.c_loss, learning_rate=self.learning_rate, optimizer=tf.train.RMSPropOptimizer, variables=theta_c, global_step=counter_c) # define the clip op clipped_var_c = [tf.assign(var, tf.clip_by_value(var, -self.clip_abs, self.clip_abs)) for var in theta_c] # merge the clip operations on critic variables with tf.control_dependencies([opt_c]): opt_c = tf.tuple(clipped_var_c) return opt_g, opt_c, z, real_data
def build_train_graph(model, params): """ builds training graph """ _params = { "learning_rate": 0.001, "clip_norm": 5.0, } _params.update(params) logger.debug("building training graph: %s.", _params) learning_rate = tf.placeholder_with_default(_params["learning_rate"], [], "learning_rate") global_step = tf.Variable(0, name='global_step', trainable=False) train_op = layers.optimize_loss(model["loss"], global_step, learning_rate, "Adam", clip_gradients=_params["clip_norm"]) model = { "global_step": global_step, "train_op": train_op, "learning_rate": learning_rate, "train_args": _params } return model
def model_function(features, targets, mode): # Configure the single layer perceptron model hlayer1 = layers.fully_connected(inputs=features, num_outputs=20, activation_fn=tf.sigmoid) hlayer2 = layers.fully_connected(inputs=hlayer1, num_outputs=10, activation_fn=tf.sigmoid) outputs = layers.fully_connected(inputs=hlayer2, num_outputs=10, activation_fn=None) # Calculate loss using mean squared error loss = losses.softmax_cross_entropy(outputs, targets) # Create an optimizer for minimizing the loss function optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.5, optimizer="SGD") probs = tf.nn.softmax(outputs) return {'probs':probs, 'labels':tf.arg_max(probs,1)}, loss, optimizer
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10))) * 100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam") return {"predictions": predict, "classes": classes}, loss, train_op
def softmax_model(X, Y_, mode): Ylogits = layers.linear(X, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def dnn_tanh(features, target): target = tf.one_hot(target, 2, 1.0, 0.0) # Organize continues features. final_features = [ tf.expand_dims(tf.cast(features[var], tf.float32), 1) for var in continues_vars ] # Embed categorical variables into distributed representation. for var in categorical_vars: feature = learn.ops.categorical_variable( features[var + '_ids'], len(categorical_var_encoders[var].classes_), embedding_size=CATEGORICAL_EMBED_SIZE, name=var) final_features.append(feature) # Concatenate all features into one vector. features = tf.concat(1, final_features) # Deep Neural Network logits = layers.stack(features, layers.fully_connected, [10, 20, 10], activation_fn=tf.tanh) prediction, loss = learn.models.logistic_regression(logits, target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.05) return tf.argmax(prediction, dimension=1), loss, train_op
def __init__(self, sess, model_fn, config, lr, restore=False, clip_grads=1.): self.sess, self.config, self.lr = sess, config, lr (self.policy, self.value), self.inputs = model_fn( config ) # self.inputs = [screen_input, minimap_input] + non_spatial_inputs self.actions = [ tf.placeholder(tf.int32, [None]) for _ in range(len(self.policy)) ] # policy is a list, actions is a list 每个元素对应动作函数或参数 #print(self.inputs) #print(self.actions) with tf.variable_scope('loss'): acts = [] for i, (d, is_spatial) in enumerate(self.config.policy_dims()): if is_spatial: acts.append( tf.one_hot(self.actions[i], config.sz * config.sz)) else: acts.append(tf.one_hot(self.actions[i], d)) # acts = self.mask(self.actions[0], acts) # TODO ce = sum([ -tf.reduce_sum(a * clip_log(p), axis=-1) for a, p in zip(acts, self.policy) ]) ce_loss = tf.reduce_mean(ce) val_loss = 0 * tf.reduce_mean( self.value) # hack to match a2c agent computational graph self.loss = ce_loss + val_loss tf.summary.scalar('loss', self.loss) with tf.variable_scope('train'): self.step = tf.Variable(0, trainable=False) # opt = tf.train.AdamOptimizer(learning_rate=lr, epsilon=1e-5) opt = tf.train.RMSPropOptimizer(learning_rate=lr, decay=0.99, epsilon=1e-5) self.train_op = layers.optimize_loss(loss=self.loss, optimizer=opt, learning_rate=None, global_step=self.step, clip_gradients=clip_grads) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver() if restore: self.saver.restore( self.sess, tf.train.latest_checkpoint('weights/' + self.config.full_id() + '_imitation')) self.summary_op = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter('supervised_logs/' + self.config.full_id(), graph=None)
def __init__(self, hidden_size, batch_size, learning_rate): self.input_tensor = tf.placeholder(tf.float32, [None, 28 * 28]) with arg_scope([layers.conv2d, layers.conv2d_transpose], activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params={'scale': True}): with tf.variable_scope("vae_model") as scope: encoded = self.encoder(self.input_tensor, hidden_size * 2) mean = encoded[:, hidden_size] stddev = tf.sqrt(tf.exp(encoded[:, hidden_size:])) epsilon = tf.random_normal([tf.shape(mean)[0], hidden_size]) input_sample = mean + epsilon * stddev output_tensor = self.decoder(input_sample) with tf.variable_scope('vae_model', reuse=True) as scope: self.sampled_tensor = self.decoder( tf.random_normal([batch_size, hidden_size])) vae_loss = self.__get_vae_cost(mean, stddev) rec_loss = self.__get_reconstruction_cost(output_tensor, self.input_tensor) loss = vae_loss + rec_loss self.train = layers.optimize_loss( loss, tf.contrib.framework.get_or_create_global_step(), learning_rate=learning_rate, optimizer='Adam', update_ops=[]) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer())
def emb_classifier(x, x_mask, y, dropout, opt, class_penalty): # comment notation # b: batch size, s: sequence length, e: embedding dim, c : num of class x_emb, W_norm = embedding(x, opt) # b * s * e x_emb=tf.cast(x_emb,tf.float32) W_norm=tf.cast(W_norm,tf.float32) y_pos = tf.argmax(y, -1) y_emb, W_class = embedding_class(y_pos, opt, 'class_emb') # b * e, c * e y_emb=tf.cast(y_emb,tf.float32) W_class=tf.cast(W_class,tf.float32) W_class_tran = tf.transpose(W_class, [1,0]) # e * c x_emb = tf.expand_dims(x_emb, 3) # b * s * e * 1 H_enc = att_emb_ngram_encoder_cnn(x_emb, x_mask, W_class, W_class_tran, opt) H_enc_list= tf.unstack(H_enc, axis=-1) logits_list = [] for i, ih in enumerate(H_enc_list): logits_list.append(discriminator_0layer(ih, opt, dropout, prefix='classify_{}'.format(i), num_outputs=1, is_reuse=False) ) logits = tf.concat(logits_list,-1) prob = tf.nn.softmax(logits) # class_y = tf.constant(name='class_y', shape=[opt.num_class, opt.num_class], dtype=tf.float32, value=np.identity(opt.num_class),) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=logits)) global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, optimizer=opt.optimizer, learning_rate=opt.lr) return accuracy, loss, train_op, W_norm, global_step, logits, prob
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) # TODO(ipolosukhin): Move this to TensorFlowEstimator when # moving out training. if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate( contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss(loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Add update ops. train_op = control_flow_ops.group(train_op, *ops.get_collection('update_ops')) return train_op, loss
def _build_optimizer(self): self.train_op = layers.optimize_loss( self.loss, tf.train.get_global_step(), optimizer='Adam', learning_rate=Config.train.learning_rate, summaries=['loss', 'learning_rate'], name="train_op")
def model_function(features, targets, mode): # Two hidden layers - 20,10 = # perceptrons in layer1, layer2. Both have ReLU activation # More concise syntax hlayers = layers.stack(features, layers.fully_connected, [20, 10], activation_fn=tf.nn.relu) # hidden layers have to be fully connected for best performance. So, no option in tensorflow for # non-fully connected layers; need to write custom code to do that outputs = layers.fully_connected( inputs=hlayers, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.001, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) # TODO(ipolosukhin): Move this to TensorFlowEstimator when # moving out training. if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate(contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) # Add update ops. train_op = control_flow_ops.group( train_op, *ops.get_collection('update_ops')) return train_op, loss
def model_function(features, targets): targets = tf.one_hot(targets, 2, 1, 0) # two perceptrons in output outputs = layers.fully_connected(inputs=features, num_outputs=2, activation_fn=tf.sigmoid) outputs_dict = {"labels": outputs} # Calculate loss using mean squared error loss = losses.mean_squared_error(outputs, targets) # Create training operation optimizer = layers.optimize_loss( loss=loss, # step is not an integer but a wrapper around it, just as Java has 'Integer' on top of 'int' global_step=tf.contrib.framework.get_global_step(), learning_rate=0.001, optimizer="SGD") # Why return 'loss' separately when it is already a part of optimizer? # evaluate() needs only - outputs_dict,loss [does not need optimizer since it is not learning] # fit() needs only - outputs_dict,loss,optimizer [does not need outputs_dict since it is not predicting] # predict needs only - outputs_dict # So, 'loss' sent separately for use by evaluate() return outputs_dict, loss, optimizer
def emb_classifier(x, x_mask, y, dropout, opt, class_penalty): # comment notation # b: batch size, s: sequence length, e: embedding dim, c : num of class x_emb, W_norm = embedding(x, opt) # b * s * e x_emb=tf.cast(x_emb,tf.float32) W_norm=tf.cast(W_norm,tf.float32) y_pos = tf.argmax(y, -1) y_emb, W_class = embedding_class(y_pos, opt, 'class_emb') # b * e, c * e y_emb=tf.cast(y_emb,tf.float32) W_class=tf.cast(W_class,tf.float32) W_class_tran = tf.transpose(W_class, [1,0]) # e * c x_emb = tf.expand_dims(x_emb, 3) # b * s * e * 1 H_enc, beta = att_emb_ngram_encoder_maxout(x_emb, x_mask, W_class, W_class_tran, opt) H_enc = tf.squeeze(H_enc) # H_enc=tf.cast(H_enc,tf.float32) logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.num_class, is_reuse=False) # b * c logits_class = discriminator_2layer(W_class, opt, dropout, prefix='classify_', num_outputs=opt.num_class, is_reuse=True) prob = tf.nn.softmax(logits) class_y = tf.constant(name='class_y', shape=[opt.num_class, opt.num_class], dtype=tf.float32, value=np.identity(opt.num_class),) y_pred = tf.argmax(prob, 1) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)) + class_penalty * tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=class_y, logits=logits_class)) global_step = tf.Variable(0, trainable=False) train_op = layers.optimize_loss( loss, global_step=global_step, optimizer=opt.optimizer, learning_rate=opt.lr) return accuracy, loss, train_op, W_norm, global_step, beta, prob, y_pred
def _build_optimizer(self): # TensorFlow placeholders and compute advantages fns = tf.placeholder(tf.int32, [None], 'fns') args = { k: tf.placeholder(tf.int32, [None], 'args') for k in self.policy[1].keys() } self.actions = (fns, args) self.returns = tf.placeholder(tf.float32, [None], 'returns') advantages = tf.stop_gradient(self.returns - self.value) # Create loss TensorFlow operation using placeholders negative_log_policy = self.model.get_neg_log_prob( self.actions, self.policy) policy_loss = tf.reduce_mean(advantages * negative_log_policy) value_loss = self.value_loss_coeff * tf.reduce_mean( tf.square(self.value - self.returns)) entropy_loss = self.entropy_loss_coeff * tf.reduce_mean( self.model.get_entropy(self.policy)) # Create the final optimizer using loss loss = policy_loss + value_loss - entropy_loss optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate, decay=0.99, epsilon=1e-5) return layers.optimize_loss(loss=loss, global_step=tf.train.get_global_step(), learning_rate=None, optimizer=optimizer, clip_gradients=self.max_gradient_norm, name="train_operation")
def model_function(features, targets, mode): hlayers = layers.stack( features, layers.fully_connected, [1000, 100, 50, 20], activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0), weights_initializer=layers.xavier_initializer(uniform=True, seed=100)) # hidden layers have to be fully connected for best performance. So, no option in tensorflow for # non-fully connected layers; need to write custom code to do that outputs = layers.fully_connected( inputs=hlayers, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.8, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def _model_fn(features, labels, mode, params): """Constructs the model function. Args: features: Dictionary of input features. labels: Tensor of labels if mode is `TRAIN` or `EVAL`, otherwise `None`. mode: ModeKey object (`TRAIN` or `EVAL`). params: Parameter dictionary passed from the Estimator object. Returns: An EstimatorSpec object that encapsulates the model and its serving configurations. """ del params # Unused. blurred = inference_fn(features['frame_0'], features['frame_1']) if mode in [tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL]: loss = tf.losses.absolute_difference(labels, blurred) else: loss = None if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.AdamOptimizer( learning_rate=hparams.learning_rate) train_op = contrib_layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), learning_rate=None, optimizer=optimizer, name='') # Prevents scope prefix. else: train_op = None if mode == tf.estimator.ModeKeys.EVAL: eval_metric_ops = {'PSNR': psnr(labels, blurred)} def summary(images, name): """As a hack, saves image summaries by adding to `eval_metric_ops`.""" images = tf.saturate_cast(images * 255 + 0.5, tf.uint8) eval_metric_ops[name] = (tf.summary.image(name, images, max_outputs=2), tf.no_op()) summary(features['frame_0'], 'Frame 0') summary(features['frame_1'], 'Frame 1') summary(labels, 'Labels') summary(blurred, 'Blurred') diffs = (blurred - labels + 1.0) / 2.0 summary(diffs, 'Diffs') else: eval_metric_ops = None return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)
def model_function(features, targets, mode): # don't need one-hot encoding since target is already in one-hot format # sigmoid also will work although the interpretability is difficult; # The output with the max. value corresponds to the 'class' - whether sigmoid or softmax outputs = layers.fully_connected( inputs=features, num_outputs=10, # 10 perceptrons for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "sigmoid_cross_entropy" loss # layer gives direct/plain outputs - linear activation. To compute losses, we use softmax on top of plain outputs # Calculate loss using cross-entropy error; also use the 'sigmoid' activation function # sigmoid and cross-entropy combined together to handle log(0) and other border-case issues loss = losses.sigmoid_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, # step is not an integer but a wrapper around it, just as Java has 'Integer' on top of 'int' global_step=tf.contrib.framework.get_global_step(), learning_rate=0.5, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.sigmoid(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def model_function(features, targets, mode): # 1st hidden layer hlayer = layers.fully_connected(inputs=features, num_outputs=50, activation_fn=tf.sigmoid) # Sigmoid perceptrons # Shallow neural network because there is only 1 hidden layer outputs = layers.fully_connected(inputs=hlayer, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy (outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.001, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs':probs, 'labels':tf.argmax(probs, 1)}, loss, optimizer
def lenet5_model(X, y, mode, image_size=(-1, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE, 1), pool_size=(1, 2, 2, 1)): X = tf.pad(tf.reshape(X, image_size), [[0, 0], [2, 2], [2, 2], [0, 0]], mode="CONSTANT") print("x ", X.shape) print("y ", y.shape) layer1 = lenet5_layer(X, 6, [5, 5], pool_size) print("layer1 ", layer1.shape) layer2 = lenet5_layer(layer1, 16, [5, 5], pool_size) print("layer2 ", layer2.shape) layer3 = layers.conv2d(layer2, num_outputs=120, kernel_size=[5, 5], activation_fn=tf.nn.softmax, padding='VALID') print("layer3 ", layer3.shape) result = dense_layer(layer3, [84, 10], keep_prob=0.5) result = tf.reshape(result, [-1, 10]) print("result ", result.shape) prediction, loss = learn.models.logistic_regression_zero_init(result, y) train_op = layers.optimize_loss(loss, framework.get_global_step(), optimizer='Adagrad', learning_rate=0.1) return prediction, loss, train_op
def _build_training_ops(self): """Creates the training operations. Instance attributes created: optimization_op: the operation of optimize the loss. update_op: the operation to update the q network. """ with tf.variable_scope(self.scope, reuse=self.reuse): self.optimization_op = contrib_layers.optimize_loss( loss=self.weighted_error, global_step=tf.train.get_or_create_global_step(), learning_rate=self.learning_rate, optimizer=self.optimizer, clip_gradients=self.grad_clipping, learning_rate_decay_fn=functools.partial( tf.train.exponential_decay, decay_steps=self.learning_rate_decay_steps, decay_rate=self.learning_rate_decay_rate), variables=self.q_fn_vars) self.update_op = [] for var, target in zip( sorted(self.q_fn_vars, key=lambda v: v.name), sorted(self.q_tp1_vars, key=lambda v: v.name)): self.update_op.append(target.assign(var)) self.update_op = tf.group(*self.update_op)
def categorical_model(features, target): target = tf.one_hot(target, 2, 1.0, 0.0) features = learn.ops.categorical_variable( features, n_classes, embedding_size=EMBEDDING_SIZE, name='embarked') prediction, loss = learn.models.logistic_regression(tf.squeeze(features, [1]), target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.05) return tf.argmax(prediction, dimension=1), loss, train_op
def conv_model(features, target): target = tf.one_hot(target, 10, 1.0, 0.0) features = tf.expand_dims(features, 3) features = tf.reduce_max(layers.conv2d(features, 12, [3, 3]), [1, 2]) features = tf.reshape(features, [-1, 12]) prediction, loss = learn.models.logistic_regression(features, target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer="SGD", learning_rate=0.01) return tf.argmax(prediction, dimension=1), loss, train_op
def dnn_tanh(features, target): target = tf.one_hot(target, 2, 1.0, 0.0) logits = layers.stack(features, layers.fully_connected, [10, 20, 10], activation_fn=tf.tanh) prediction, loss = learn.models.logistic_regression(logits, target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.05) return tf.argmax(prediction, dimension=1), loss, train_op
def auto_encoder(x_1, x_2, x_mask_1, x_mask_2, y, dropout, opt): x_1_emb, W_emb = embedding(x_1, opt) # batch L emb x_2_emb = tf.nn.embedding_lookup(W_emb, x_2) x_1_emb = tf.nn.dropout(x_1_emb, dropout) # batch L emb x_2_emb = tf.nn.dropout(x_2_emb, dropout) # batch L emb biasInit = tf.constant_initializer(0.001, dtype=tf.float32) x_1_emb = layers.fully_connected(tf.squeeze(x_1_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=None) # batch L emb x_2_emb = layers.fully_connected(tf.squeeze(x_2_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=True) x_1_emb = tf.expand_dims(x_1_emb, 3) # batch L emb 1 x_2_emb = tf.expand_dims(x_2_emb, 3) if opt.encoder == 'aver': H_enc_1 = aver_emb_encoder(x_1_emb, x_mask_1) H_enc_2 = aver_emb_encoder(x_2_emb, x_mask_2) elif opt.encoder == 'max': H_enc_1 = max_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = max_emb_encoder(x_2_emb, x_mask_2, opt) elif opt.encoder == 'concat': H_enc_1 = concat_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = concat_emb_encoder(x_2_emb, x_mask_2, opt) # discriminative loss term if opt.combine_enc == 'mult': H_enc = tf.multiply(H_enc_1, H_enc_2) # batch * n_gan if opt.combine_enc == 'concat': H_enc = tf.concat([H_enc_1, H_enc_2], 1) if opt.combine_enc == 'sub': H_enc = tf.subtract(H_enc_1, H_enc_2) if opt.combine_enc == 'mix': H_1 = tf.multiply(H_enc_1, H_enc_2) H_2 = tf.concat([H_enc_1, H_enc_2], 1) H_3 = tf.subtract(H_enc_1, H_enc_2) H_enc = tf.concat([H_1, H_2, H_3], 1) # calculate the accuracy logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.category, is_reuse=None) prob = tf.nn.softmax(logits) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)) train_op = layers.optimize_loss( loss, framework.get_global_step(), optimizer='Adam', # variables=d_vars, learning_rate=opt.lr) return accuracy, loss, train_op, W_emb
def one_hot_categorical_model(features, target): target = tf.one_hot(target, 2, 1.0, 0.0) features = tf.one_hot(features, n_classes, 1.0, 0.0) prediction, loss = learn.models.logistic_regression( tf.squeeze(features, [1]), target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.01) return tf.argmax(prediction, dimension=1), loss, train_op
def _build_model(self, data, target): ids = tensorflow.split(1, self.n_ids, data) node_vectors = [ learn.ops.categorical_variable(ids[i], self.vocabulary_sizes[i], self.layer_size, str(i)) for i in range(self.n_ids) ] activation_in = tensorflow.squeeze(tensorflow.concat(2, node_vectors), [1]) activation_out = layers.stack(activation_in, layers.fully_connected, self.hidden_units_formation) prediction, loss = learn.models.linear_regression(activation_out, target) train_op = layers.optimize_loss(loss, framework.get_global_step(), self.learning_rate, "SGD") return prediction, loss, train_op
def conv_model(X, Y_): XX = tf.reshape(X, [-1, 28, 28, 1]) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6]) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)) train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.003, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def conv_model(X, Y_, mode): XX = tf.reshape(X, [-1, 28, 28, 1]) biasInit = tf.constant_initializer(0.1, dtype=tf.float32) Y1 = layers.conv2d(XX, num_outputs=6, kernel_size=[6, 6], biases_initializer=biasInit) Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit) Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit) Y4 = layers.flatten(Y3) Y5 = layers.relu(Y4, 200, biases_initializer=biasInit) Ylogits = layers.linear(Y5, 10) predict = tf.nn.softmax(Ylogits) classes = tf.cast(tf.argmax(predict, 1), tf.uint8) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Ylogits, tf.one_hot(Y_, 10)))*100 train_op = layers.optimize_loss(loss, framework.get_global_step(), 0.001, "Adam") return {"predictions":predict, "classes": classes}, loss, train_op
def dnn_tanh(features, target): target = tf.one_hot(target, 2, 1.0, 0.0) # Organize continues features. final_features = [tf.expand_dims(tf.cast(features[var], tf.float32), 1) for var in continues_vars] # Embed categorical variables into distributed representation. for var in categorical_vars: feature = learn.ops.categorical_variable( features[var + '_ids'], len(categorical_var_encoders[var].classes_), embedding_size=CATEGORICAL_EMBED_SIZE, name=var) final_features.append(feature) # Concatenate all features into one vector. features = tf.concat(1, final_features) # Deep Neural Network logits = layers.stack(features, layers.fully_connected, [10, 20, 10], activation_fn=tf.tanh) prediction, loss = learn.models.logistic_regression(logits, target) train_op = layers.optimize_loss(loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.05) return tf.argmax(prediction, dimension=1), loss, train_op
def _model_fn(features, targets, mode): ops.get_default_graph().add_to_collection('IS_TRAINING', mode == 'train') if self.class_weight is not None: constant_op.constant(self.class_weight, name='class_weight') predictions, loss = model_fn(features, targets) if isinstance(self.learning_rate, types.FunctionType): learning_rate = self.learning_rate(contrib_framework.get_global_step()) else: learning_rate = self.learning_rate if isinstance(self.optimizer, types.FunctionType): optimizer = self.optimizer(learning_rate) else: optimizer = self.optimizer train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=learning_rate, optimizer=optimizer, clip_gradients=self.clip_gradients) return predictions, loss, train_op
def bow_model(features, target): document = utils.prune_out_of_vocab_ids(features['document_sequence'], VOCAB_SIZE) question = utils.prune_out_of_vocab_ids(features['question_sequence'], VOCAB_SIZE) answers = tf.squeeze(tf.one_hot(target, ANSWER_NUM, 1.0, 0.0), squeeze_dims=[1]) embeddings = tf.get_variable('embeddings', [VOCAB_SIZE, EMBED_DIM]) doc_enc = layers.safe_embedding_lookup_sparse( [embeddings], document, None, combiner='sum') question_enc = layers.safe_embedding_lookup_sparse( [embeddings], question, None, combiner='sum') joint_enc = tf.concat(1, [doc_enc, question_enc]) answer_embeddings = tf.get_variable( 'answer_embeddings', [ANSWER_DIM, ANSWER_NUM]) answer_biases = tf.get_variable('answer_biases', [ANSWER_NUM]) softmax, loss = learn.ops.softmax_classifier( joint_enc, answers, answer_embeddings, answer_biases) train_op = layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), learning_rate=LEARNING_RATE, optimizer='Adam') return softmax, loss, train_op
def conv_model(feature, target, mode): """2-layer convolution model.""" # Convert the target to a one-hot tensor of shape (batch_size, 10) and # with a on-value of 1 for each one-hot vector of length 10. target = tf.one_hot(tf.cast(target, tf.int32), 10, 1, 0) # Reshape feature to 4d tensor with 2nd and 3rd dimensions being # image width and height final dimension being the number of color channels. feature = tf.reshape(feature, [-1, 28, 28, 1]) # First conv layer will compute 32 features for each 5x5 patch with tf.variable_scope('conv_layer1'): h_conv1 = layers.convolution(feature, 32, kernel_size=[5, 5], activation_fn=tf.nn.relu) h_pool1 = max_pool_2x2(h_conv1) # Second conv layer will compute 64 features for each 5x5 patch. with tf.variable_scope('conv_layer2'): h_conv2 = layers.convolution(h_pool1, 64, kernel_size=[5, 5], activation_fn=tf.nn.relu) h_pool2 = max_pool_2x2(h_conv2) # reshape tensor into a batch of vectors h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) # Densely connected layer with 1024 neurons. h_fc1 = layers.dropout( layers.fully_connected( h_pool2_flat, 1024, activation_fn=tf.nn.relu), keep_prob=0.5, is_training=mode == tf.contrib.learn.ModeKeys.TRAIN) # Compute logits (1 per class) and compute loss. logits = layers.fully_connected(h_fc1, 10, activation_fn=None) loss = tf.contrib.losses.softmax_cross_entropy(logits, target) # Create a tensor for training op. train_op = layers.optimize_loss( loss, tf.contrib.framework.get_global_step(), optimizer='SGD', learning_rate=0.001) return tf.argmax(logits, 1), loss, train_op
def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. Expected to be overriden by sub-classes that require custom support. This implementation uses `model_fn` passed as parameter to constructor to build model. Args: features: `Tensor` or `dict` of `Tensor` objects. targets: `Tensor` or `dict` of `Tensor` objects. Returns: Tuple of train `Operation` and loss `Tensor`. """ _, loss = self._model_fn(features, targets, ModeKeys.TRAIN) train_op = layers.optimize_loss( loss, contrib_framework.get_global_step(), learning_rate=self.learning_rate, optimizer=self.optimizer, clip_gradients=self.clip_gradients) return train_op, loss
def seq2seq(mode, features, labels, params): vocab_size = params['vocab_size'] embed_dim = params['embed_dim'] num_units = params['num_units'] input_max_length = params['input_max_length'] output_max_length = params['output_max_length'] inp = features['input'] output_tensor = features['output'] batch_size = tf.shape(inp)[0] start_tokens = tf.zeros([batch_size], dtype=tf.int64) + GO_TOKEN train_output = tf.concat([tf.expand_dims(start_tokens, 1), output_tensor], 1) #print (train_output.get_shape().as_list()) input_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(inp, 1)), 1) #print (input_lengths.get_shape().as_list()) output_lengths = tf.reduce_sum(tf.to_int32(tf.not_equal(train_output, 1)), 1) #print (output_lengths.get_shape().as_list()) input_embed = layers.embed_sequence( inp, vocab_size=vocab_size, embed_dim=embed_dim, scope='embed') output_embed = layers.embed_sequence( train_output, vocab_size=vocab_size, embed_dim=embed_dim, scope='embed', reuse=True) with tf.variable_scope('embed', reuse=True): embeddings = tf.get_variable('embeddings') cell = tf.contrib.rnn.GRUCell(num_units=num_units) encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(cell, input_embed, dtype=tf.float32) #print (encoder_outputs.get_shape().as_list()) train_helper = tf.contrib.seq2seq.TrainingHelper(output_embed, output_lengths) # train_helper = tf.contrib.seq2seq.ScheduledEmbeddingTrainingHelper( # output_embed, output_lengths, embeddings, 0.3 # ) pred_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( embeddings, start_tokens=tf.to_int32(start_tokens), end_token=8) def decode(helper, scope, reuse=None): with tf.variable_scope(scope, reuse=reuse): attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=num_units, memory=encoder_outputs, memory_sequence_length=input_lengths) cell = tf.contrib.rnn.GRUCell(num_units=num_units) attn_cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_layer_size=num_units / 2) out_cell = tf.contrib.rnn.OutputProjectionWrapper( attn_cell, vocab_size, reuse=reuse ) decoder = tf.contrib.seq2seq.BasicDecoder( cell=out_cell, helper=helper, initial_state=out_cell.zero_state( dtype=tf.float32, batch_size=batch_size)) #initial_state=encoder_final_state) outputs = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=True, maximum_iterations=output_max_length ) return outputs[0] train_outputs = decode(train_helper, 'decode') pred_outputs = decode(pred_helper, 'decode', reuse=True) tf.identity(train_outputs.sample_id[0], name='train_pred') weights = tf.to_float(tf.not_equal(train_output[:, :-1], 1)) loss = tf.contrib.seq2seq.sequence_loss( train_outputs.rnn_output, output_tensor, weights=weights) train_op = layers.optimize_loss( loss, tf.train.get_global_step(), optimizer=params.get('optimizer', 'Adam'), learning_rate=params.get('learning_rate', 0.001), summaries=['loss', 'learning_rate']) tf.identity(pred_outputs.sample_id[0], name='predictions') # if mode == tf.estimator.ModeKeys.PREDICT: # return tf.estimator.EstimatorSpec(mode=mode, predictions = pred_outputs) return tf.estimator.EstimatorSpec( mode=mode, predictions=pred_outputs.sample_id, loss=loss, train_op=train_op )
def conv_model_train_op(loss, mode): return layers.optimize_loss(loss, framework.get_global_step(), learning_rate=0.003, optimizer="Adam", # to remove learning rate decay, comment the next line learning_rate_decay_fn=lambda lr, step: 0.0001 + tf.train.exponential_decay(lr, step, -2000, math.e) ) if mode == learn.ModeKeys.TRAIN else None