def build_classifier(): global out, encodings_batch, labels_batch, c_vars_saver, loss, merged_sum with tf.variable_scope("classifier"): net = lrelu(tf.layers.dense(encodings_batch, 64, kernel_initializer=tf.random_normal_initializer(stddev=0.02), bias_initializer=tf.constant_initializer(0.0), trainable=is_training, name='hidden1')) net = lrelu(tf.layers.dense(net, 64, kernel_initializer=tf.random_normal_initializer(stddev=0.02), bias_initializer=tf.constant_initializer(0.0), trainable=is_training, name='hidden2')) out = tf.layers.dense(net, 5, kernel_initializer=tf.random_normal_initializer(stddev=0.02), bias_initializer=tf.constant_initializer(0.0), trainable=is_training, name='out') loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_batch, logits=out) c_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='classifier/') print(c_vars) print("aay") c_vars_summary = [] loss_summary = tf.summary.scalar('loss', tf.reduce_mean(loss)) for var in c_vars: c_vars_summary.append(tf.summary.histogram(var.name, var)) merged_sum = tf.summary.merge([loss_summary] + c_vars_summary) c_vars_saver = tf.train.Saver(var_list=c_vars, max_to_keep=5)
def _build_network(self, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net_conv = self._image_to_head(is_training) with tf.variable_scope(self._scope, self._scope): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError fc7 = self._head_to_tail(pool5, is_training) with tf.variable_scope(self._scope, self._scope): # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self, sess, n_features, lr=0.01): self.sess = sess with tf.name_scope('inputs'): self.s = tf.placeholder(tf.float32, [1, n_features], "state") self.v_ = tf.placeholder(tf.float32, [1, 1], name="v_next") self.r = tf.placeholder(tf.float32, name='r') with tf.variable_scope('Critic'): l1 = tf.layers.dense( inputs=self.s, units=30, # number of hidden units activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='l1' ) self.v = tf.layers.dense( inputs=l1, units=1, # output units activation=None, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='V' ) with tf.variable_scope('squared_TD_error'): self.td_error = tf.reduce_mean(self.r + GAMMA * self.v_ - self.v) self.loss = tf.square(self.td_error) # TD_error = (r+gamma*V_next) - V_eval with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
def model(self, images, input_size, output_size, isEval=None): with tf.variable_scope('hidden1', reuse=isEval): weights = tf.get_variable("weights", [input_size, self.hidden1_units], initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(input_size)), seed=self.SEED)) biases = tf.get_variable("biases", [self.hidden1_units], initializer=tf.constant_initializer(0.0)) reg_hidden1 = tf.nn.l2_loss(weights) hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) with tf.variable_scope('softmax_linear', reuse=isEval): weights = tf.get_variable("weights", [self.hidden1_units, output_size], initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(self.hidden1_units)), seed=self.SEED)) biases = tf.get_variable("biases", [output_size], initializer=tf.constant_initializer(0.0)) logits = tf.matmul(hidden1, weights) + biases reg_linear = tf.nn.l2_loss(weights) if isEval: return logits else: regularizers = (reg_hidden1 + reg_linear) return (logits, regularizers)
def __init__(self, n_inputs, n_rules, learning_rate=1e-2): self.n = n_inputs self.m = n_rules self.inputs = tf.placeholder(tf.float32, shape=(None, n_inputs)) # Input self.targets = tf.placeholder(tf.float32, shape=None) # Desired output mu = tf.get_variable("mu", [n_rules * n_inputs], initializer=tf.random_normal_initializer(0, 1)) # Means of Gaussian MFS sigma = tf.get_variable("sigma", [n_rules * n_inputs], initializer=tf.random_normal_initializer(0, 1)) # Standard deviations of Gaussian MFS y = tf.get_variable("y", [1, n_rules], initializer=tf.random_normal_initializer(0, 1)) # Sequent centers self.params = tf.trainable_variables() self.rul = tf.reduce_prod( tf.reshape(tf.exp(-0.5 * tf.square(tf.subtract(tf.tile(self.inputs, (1, n_rules)), mu)) / tf.square(sigma)), (-1, n_rules, n_inputs)), axis=2) # Rule activations # Fuzzy base expansion function: num = tf.reduce_sum(tf.multiply(self.rul, y), axis=1) den = tf.clip_by_value(tf.reduce_sum(self.rul, axis=1), 1e-12, 1e12) self.out = tf.divide(num, den) self.loss = tf.losses.huber_loss(self.targets, self.out) # Loss function computation # Other loss functions for regression, uncomment to try them: # loss = tf.sqrt(tf.losses.mean_squared_error(target, out)) # loss = tf.losses.absolute_difference(target, out) self.optimize = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss) # Optimization step # Other optimizers, uncomment to try them: # self.optimize = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(self.loss) # self.optimize = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(self.loss) self.init_variables = tf.global_variables_initializer() # Variable initializer
def SRGAN_g(t_image, is_train=False, reuse=False): """ Generator in Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network feature maps (n) and stride (s) feature maps (n) and stride (s) """ w_init = tf.random_normal_initializer(stddev=0.02) b_init = None # tf.constant_initializer(value=0.0) g_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("SRGAN_g", reuse=reuse) as vs: # tl.layers.set_name_reuse(reuse) # remove for TL 1.8.0+ n = InputLayer(t_image, name='in') n = Conv2d(n, 64, (3, 3), (1, 1), act=tf.nn.relu, padding='SAME', W_init=w_init, name='n64s1/c') temp = n # B residual blocks for i in range(16): nn = Conv2d(n, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c1/%s' % i) nn = BatchNormLayer(nn, act=tf.nn.relu, is_train=is_train, gamma_init=g_init, name='n64s1/b1/%s' % i) nn = Conv2d(nn, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c2/%s' % i) nn = BatchNormLayer(nn, is_train=is_train, gamma_init=g_init, name='n64s1/b2/%s' % i) nn = ElementwiseLayer([n, nn], tf.add, name='b_residual_add/%s' % i) n = nn n = Conv2d(n, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c/m') n = BatchNormLayer(n, is_train=is_train, gamma_init=g_init, name='n64s1/b/m') n = ElementwiseLayer([n, temp], tf.add, name='add3') # B residual blacks end n = Conv2d(n, 256, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, name='n256s1/1') n = SubpixelConv2d(n, scale=2, n_out_channel=None, act=tf.nn.relu, name='pixelshufflerx2/1') n = Conv2d(n, 256, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, name='n256s1/2') n = SubpixelConv2d(n, scale=2, n_out_channel=None, act=tf.nn.relu, name='pixelshufflerx2/2') n = Conv2d(n, 3, (1, 1), (1, 1), act=tf.nn.tanh, padding='SAME', W_init=w_init, name='out') return n
def _build_net(self): with tf.name_scope('inputs'): self.tf_obs=tf.placeholder(tf.float32,[None,self.n_features],name="observations") self.tf_acts=tf.placeholder(tf.int32,[None,],name="actions_num") self.tf_vt=tf.placeholder(tf.float32,[None,],name="actions_value") layer=tf.layers.dense( inputs=self.tf_obs, units=10, activation=tf.nn.tanh kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3), bias_initializer=tf.constant_initializer(0.1), name='fc1' ) all_act=tf.layers.dense( inputs=layer, units=self.n_actions, activation=None, kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3) bias_initializer=tf.constant_initializer(0.1) name='fc2' ) self.all_act_prob=tf.nn.softmax(all_act,name='act_prob') with tf.name_scope('loss'): neg_log_prob=tf.nn.sparse_softmax_cross_enrtropy_with_logits(logits=all_act,labels=self.tf_acts) loss=tf.reduce_mean(neg_log_prob*self.tf_vt)#็จlog_p*R็ๆๅคงๅๆฅ่กจ็คบ็ฎๆ with tf.name_scope('train'): self.train_op=tf.train.AdamOptimizer(self.lr).minimize(loss)
def _build_net(self): with tf.name_scope('inputs'): self.tf_obs = tf.placeholder(tf.float32, [None, self.n_features], name="observations") self.tf_acts = tf.placeholder(tf.int32, [None, ], name="actions_num") self.tf_vt = tf.placeholder(tf.float32, [None, ], name="actions_value") # fc1 layer = tf.layers.dense( inputs=self.tf_obs, units=10, activation=tf.nn.tanh, # tanh activation kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), bias_initializer=tf.constant_initializer(0.1), name='fc1' ) # fc2 all_act = tf.layers.dense( inputs=layer, units=self.n_actions, activation=None, kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), bias_initializer=tf.constant_initializer(0.1), name='fc2' ) self.all_act_prob = tf.nn.softmax(all_act, name='act_prob') # use softmax to convert to probability with tf.name_scope('loss'): # to maximize total reward (log_p * R) is to minimize -(log_p * R), and the tf only have minimize(loss) neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=all_act, labels=self.tf_acts) # this is negative log of chosen action # or in this way: # neg_log_prob = tf.reduce_sum(-tf.log(self.all_act_prob)*tf.one_hot(self.tf_acts, self.n_actions), axis=1) loss = tf.reduce_mean(neg_log_prob * self.tf_vt) # reward guided loss with tf.name_scope('train'): self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
def discriminator(X, reuse=False): with tf.variable_scope('discriminator'): if reuse: tf.get_variable_scope().reuse_variables() K = 64 M = 128 N = 256 W1 = tf.get_variable('D_W1', [4, 4, 1, K], initializer=tf.random_normal_initializer(stddev=0.1)) B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer()) W2 = tf.get_variable('D_W2', [4, 4, K, M], initializer=tf.random_normal_initializer(stddev=0.1)) B2 = tf.get_variable('D_B2', [M], initializer=tf.constant_initializer()) W3 = tf.get_variable('D_W3', [7*7*M, N], initializer=tf.random_normal_initializer(stddev=0.1)) B3 = tf.get_variable('D_B3', [N], initializer=tf.constant_initializer()) W4 = tf.get_variable('D_W4', [N, 1], initializer=tf.random_normal_initializer(stddev=0.1)) B4 = tf.get_variable('D_B4', [1], initializer=tf.constant_initializer()) X = tf.reshape(X, [-1, 28, 28, 1], 'reshape') conv1 = conv(X, W1, B1, stride=2, name='conv1') bn1 = tf.contrib.layers.batch_norm(conv1) conv2 = conv(tf.nn.dropout(lrelu(bn1), 0.4), W2, B2, stride=2, name='conv2') # conv2 = conv(lrelu(conv1), W2, B2, stride=2, name='conv2') bn2 = tf.contrib.layers.batch_norm(conv2) flat = tf.reshape(tf.nn.dropout(lrelu(bn2), 0.4), [-1, 7*7*M], name='flat') # flat = tf.reshape(lrelu(conv2), [-1, 7*7*M], name='flat') dense = lrelu(tf.matmul(flat, W3) + B3) logits = tf.matmul(dense, W4) + B4 prob = tf.nn.sigmoid(logits) return prob, logits
def _build_cnn(self, feat_x): with tf.variable_scope("cnn_global", reuse=True): W1 = tf.get_variable(dtype=tf.float32, shape=[self.filter_stride, self.dim_feat_x, 1, self.num_feat_map], name="weight_w1", initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b1 = tf.get_variable(dtype=tf.float32, shape=[self.num_feat_map], name="bias_b1", initializer=tf.constant_initializer(1.0)) x_inputs = tf.reshape(feat_x, [-1, self.window_size, self.dim_feat_x, 1]) # print x_inputs.get_shape() # h_conv_1 size: [-1, dwf, ws, nfm] h_conv_1 = tf.nn.relu(self._conv_2d(x_inputs, W1) + b1) # print h_conv_1.get_shape() # h_max_pool size: [-1, 1,1, nfm] h_max_pool = self._max_pool(h_conv_1) # print h_max_pool.get_shape() # concentrate in one vector # sent_vec size: [-1, nfm] sent_vec = tf.reshape(h_max_pool, [-1, self.num_feat_map]) # print sent_vec.get_shape() with tf.variable_scope("cnn_global", reuse=True): W2 = tf.get_variable(dtype=tf.float32, shape=[self.num_feat_map, self.output_size], name="weight_w2", initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1)) b2 = tf.get_variable(dtype=tf.float32, shape=[self.output_size], name="bias_b2", initializer=tf.constant_initializer(1.0)) logits = tf.matmul(sent_vec, W2) + b2 return logits
def generator(X, batch_size=64): with tf.variable_scope('generator'): K = 256 L = 128 M = 64 W1 = tf.get_variable('G_W1', [100, 7*7*K], initializer=tf.random_normal_initializer(stddev=0.1)) B1 = tf.get_variable('G_B1', [7*7*K], initializer=tf.constant_initializer()) W2 = tf.get_variable('G_W2', [4, 4, M, K], initializer=tf.random_normal_initializer(stddev=0.1)) B2 = tf.get_variable('G_B2', [M], initializer=tf.constant_initializer()) W3 = tf.get_variable('G_W3', [4, 4, 1, M], initializer=tf.random_normal_initializer(stddev=0.1)) B3 = tf.get_variable('G_B3', [1], initializer=tf.constant_initializer()) X = lrelu(tf.matmul(X, W1) + B1) X = tf.reshape(X, [batch_size, 7, 7, K]) deconv1 = deconv(X, W2, B2, shape=[batch_size, 14, 14, M], stride=2, name='deconv1') bn1 = tf.contrib.layers.batch_norm(deconv1) deconv2 = deconv(tf.nn.dropout(lrelu(bn1), 0.4), W3, B3, shape=[batch_size, 28, 28, 1], stride=2, name='deconv2') XX = tf.reshape(deconv2, [-1, 28*28], 'reshape') return tf.nn.sigmoid(XX)
def bpr_mf(user_count,item_count,hidden_dim): u = tf.placeholder(tf.int32,[None]) i = tf.placeholder(tf.int32,[None]) j = tf.placeholder(tf.int32,[None]) user_emb_w = tf.get_variable("user_emb_w", [user_count + 1, hidden_dim], initializer=tf.random_normal_initializer(0, 0.1)) item_emb_w = tf.get_variable("item_emb_w", [item_count + 1, hidden_dim], initializer=tf.random_normal_initializer(0, 0.1)) u_emb = tf.nn.embedding_lookup(user_emb_w, u) i_emb = tf.nn.embedding_lookup(item_emb_w, i) j_emb = tf.nn.embedding_lookup(item_emb_w, j) x = tf.reduce_sum(tf.multiply(u_emb,(i_emb-j_emb)),1,keep_dims=True) mf_auc = tf.reduce_mean(tf.to_float(x>0)) l2_norm = tf.add_n([ tf.reduce_sum(tf.multiply(u_emb, u_emb)), tf.reduce_sum(tf.multiply(i_emb, i_emb)), tf.reduce_sum(tf.multiply(j_emb, j_emb)) ]) regulation_rate = 0.0001 bprloss = regulation_rate * l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x))) train_op = tf.train.GradientDescentOptimizer(0.01).minimize(bprloss) return u, i, j, mf_auc, bprloss, train_op
def model(hparams, X, past=None, scope='model', reuse=False): with tf.variable_scope(scope, reuse=reuse): results = {} batch, sequence = shape_list(X) wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.01)) wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.02)) past_length = 0 if past is None else tf.shape(past)[-2] h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length)) # Transformer presents = [] pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer assert len(pasts) == hparams.n_layer for layer, past in enumerate(pasts): h, present = block(h, 'h%d' % layer, past=past, hparams=hparams) presents.append(present) results['present'] = tf.stack(presents, axis=1) h = norm(h, 'ln_f') # Language model loss. Do tokens <n predict token n? h_flat = tf.reshape(h, [batch*sequence, hparams.n_embd]) logits = tf.matmul(h_flat, wte, transpose_b=True) logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab]) results['logits'] = logits return results
def weight(name, shape, init='he', range=None): """ Initializes weight. :param name: Variable name :param shape: Tensor shape :param init: Init mode. xavier / normal / uniform / he (default is 'he') :param range: :return: Variable """ initializer = tf.constant_initializer() if init == 'xavier': fan_in, fan_out = _get_dims(shape) range = math.sqrt(6.0 / (fan_in + fan_out)) initializer = tf.random_uniform_initializer(-range, range) elif init == 'he': fan_in, _ = _get_dims(shape) std = math.sqrt(2.0 / fan_in) initializer = tf.random_normal_initializer(stddev=std) elif init == 'normal': initializer = tf.random_normal_initializer(stddev=0.1) elif init == 'uniform': if range is None: raise ValueError("range must not be None if uniform init is used.") initializer = tf.random_uniform_initializer(-range, range) var = tf.get_variable(name, shape, initializer=initializer) tf.add_to_collection('l2', tf.nn.l2_loss(var)) # Add L2 Loss return var
def cnn_inference(inputs, input_units, output_units, is_train=True, FLAGS=None): """ Define the CNN model. """ # [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3, 1] inputs = tf.reshape(inputs, [-1, 3, 3, 1]) # [BATCH_SIZE, 3, 3, 1] -> [BATCH_SIZE, 3, 3, 8] with tf.variable_scope("conv_0"): weights = tf.get_variable( "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [8], initializer=tf.random_normal_initializer()) layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME") layer = tf.nn.bias_add(layer, bias) layer = tf.nn.relu(layer) # [BATCH_SIZE, 3, 3, 8] -> [BATCH_SIZE, 3 * 3 * 8] layer = tf.reshape(layer, [-1, 3 * 3 * 8]) # [BATCH_SIZE, 3 * 3 * 8] -> [BATCH_SIZE, LABEL_SIZE] with tf.variable_scope("output_layer"): weights = tf.get_variable( "weights", [3 * 3 * 8, FLAGS.label_size], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [FLAGS.label_size], initializer=tf.random_normal_initializer()) layer = tf.add(tf.matmul(layer, weights), bias) return layer
def create_positional_emb_2d(self, targets): """Learned 2d positional embedding for images.""" mesh = targets.mesh positional_emb_rows_var = mtf.get_variable( mesh, "positional_emb_rows", mtf.Shape([self.pos_dim, self.model_dim]), initializer=tf.random_normal_initializer(), activation_dtype=self.activation_type) positional_emb_cols_var = mtf.get_variable( mesh, "positional_emb_cols", mtf.Shape([self.pos_dim, self.model_dim]), initializer=tf.random_normal_initializer(), activation_dtype=self.activation_type) targets_position_x = mtf.range(mesh, self.rows_dim, dtype=tf.int32) targets_position_y = mtf.range(mesh, self.cols_dim, dtype=tf.int32) position_x = mtf.broadcast( mtf.gather(positional_emb_rows_var, targets_position_x, self.pos_dim), mtf.Shape([self.rows_dim, self.cols_dim, self.model_dim])) position_y = mtf.broadcast( mtf.gather(positional_emb_cols_var, targets_position_y, self.pos_dim), mtf.Shape([self.rows_dim, self.cols_dim, self.model_dim])) return position_x + position_y
def __init__(self, sess, n_features, lr=0.01): self.sess = sess self.s = tf.placeholder(tf.float32, [1, n_features], "state") self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next") self.r = tf.placeholder(tf.float32, None, 'r') with tf.variable_scope('Critic'): l1 = tf.layers.dense( inputs=self.s, units=20, # number of hidden units activation=tf.nn.relu, # None # have to be linear to make sure the convergence of actor. # But linear approximator seems hardly learns the correct Q. kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='l1' ) self.v = tf.layers.dense( inputs=l1, units=1, # output units activation=None, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='V' ) with tf.variable_scope('squared_TD_error'): self.td_error = self.r + GAMMA * self.v_ - self.v self.loss = tf.square(self.td_error) # TD_error = (r+gamma*V_next) - V_eval with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
def _build_net(self): with tf.name_scope('inputs'): self.tf_obs = tf.placeholder(tf.float32,[None,self.n_features],name='observation') self.tf_acts = tf.placeholder(tf.int32,[None,],name='actions_num') self.tf_vt = tf.placeholder(tf.float32,[None,],name='actions_value') layer = tf.layers.dense( inputs = self.tf_obs, units = 10, activation= tf.nn.tanh, kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3), bias_initializer= tf.constant_initializer(0.1), name='fc1' ) all_act = tf.layers.dense( inputs = layer, units = self.n_actions, activation = None, kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3), bias_initializer = tf.constant_initializer(0.1), name='fc2' ) self.all_act_prob = tf.nn.softmax(all_act,name='act_prob') with tf.name_scope('loss'): #neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.all_act_prob,labels =self.tf_acts) neg_log_prob = tf.reduce_sum(-tf.log(self.all_act_prob) * tf.one_hot(indices=self.tf_acts,depth=self.n_actions),axis=1) loss = tf.reduce_mean(neg_log_prob * self.tf_vt) with tf.name_scope('train'): self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
def __init__(self, sess, n_features, n_actions, lr=0.001): self.sess = sess self.s = tf.placeholder(tf.float32, [1, n_features], "state") self.a = tf.placeholder(tf.int32, None, "act") self.td_error = tf.placeholder(tf.float32, None, "td_error") # TD_error with tf.variable_scope('Actor'): l1 = tf.layers.dense( inputs=self.s, units=20, # number of hidden units activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='l1' ) self.acts_prob = tf.layers.dense( inputs=l1, units=n_actions, # output units activation=tf.nn.softmax, # get action probabilities kernel_initializer=tf.random_normal_initializer(0., .1), # weights bias_initializer=tf.constant_initializer(0.1), # biases name='acts_prob' ) with tf.variable_scope('exp_v'): log_prob = tf.log(self.acts_prob[0, self.a]) self.exp_v = tf.reduce_mean(log_prob * self.td_error) # advantage (TD_error) guided loss with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v) # minimize(-exp_v) = maximize(exp_v)
def decoder(input, output_dim, training, stddev=0.02, bias_value=0, reuse=False): w1 = tf.get_variable("w1", [input.get_shape()[1],1000], initializer=tf.random_normal_initializer(stddev=stddev)) b1 = tf.get_variable("b1", [1000], initializer=tf.constant_initializer(bias_value)) w2 = tf.get_variable("w2", [1000,1000], initializer=tf.random_normal_initializer(stddev=stddev)) b2 = tf.get_variable("b2", [1000], initializer=tf.constant_initializer(bias_value)) w3 = tf.get_variable("w3", [1000,output_dim], initializer=tf.random_normal_initializer(stddev=stddev)) b3 = tf.get_variable("b3", [output_dim], initializer=tf.constant_initializer(bias_value)) fc1 = tf.nn.relu(tf.matmul( input, w1 ) + b1, name='relu1') fc2 = tf.nn.relu(tf.matmul( fc1 , w2 ) + b2, name='relu2') fc3 = tf.nn.sigmoid(tf.matmul( fc2 , w3 ) + b3 ) if not reuse: tf.histogram_summary('DE/L1/activation', fc1) tf.histogram_summary('DE/L1/weight' , w1) tf.histogram_summary('DE/L1/bias' , b1) tf.scalar_summary( 'DE/L1/sparsity' , tf.nn.zero_fraction(fc1)) tf.histogram_summary('DE/L2/activation', fc2) tf.histogram_summary('DE/L2/weight' , w2) tf.histogram_summary('DE/L2/bias' , b2) tf.scalar_summary( 'DE/L2/sparsity' , tf.nn.zero_fraction(fc2)) tf.histogram_summary('DE/L3/activation', fc3) tf.histogram_summary('DE/L3/weight' , w3) tf.histogram_summary('DE/L3/bias' , b3) tf.scalar_summary( 'DE/L3/sparsity' , tf.nn.zero_fraction(fc3)) return fc3, [w1, b1, w2, b2, w3, b3]
def build_graph(self,test_decoder_logits): print('starting building graph [sentiment-discriminator]') with tf.variable_scope("sentiment") as scope: self.inputs = tf.slice(test_decoder_logits,[0,0,0],[self.batch_size,self.max_length,self.vocab_size]) # variable weights = { 'w2v' : tf.get_variable(initializer = tf.random_uniform_initializer(-0.1, 0.1, dtype=tf.float32),shape = [self.vocab_size, self.embedding_dim], name='w2v'), 'out_1' : tf.get_variable(initializer = tf.random_normal_initializer(), shape = [self.unit_size*2, 1], name='w_out_1'), } biases = { 'out_1' : tf.get_variable(initializer = tf.random_normal_initializer(), shape=[1], name='b_out_1'), } # structure def BiRNN(x): x = tf.unstack(x, self.max_length, 1) lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(self.unit_size, forget_bias=1.0) lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(self.unit_size,forget_bias=1.0) outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype = tf.float32 ) return outputs[-1] self.inputs_softmax = tf.nn.softmax(tf.scalar_mul(tf.constant(5.0, shape=[]),self.inputs)) y_list=[] for i in range(self.inputs.get_shape().as_list()[0]): y = tf.matmul(self.inputs_softmax[i], weights['w2v']) y = tf.reshape(y, [1, self.max_length, self.embedding_dim]) y_list.append(y) embbed_layer = tf.concat(y_list,0) layer_1 = BiRNN(embbed_layer) pred = tf.matmul(layer_1, weights['out_1']) + biases['out_1'] # get score self.score = tf.sigmoid(pred)
def discriminator(X, reuse=False): with tf.variable_scope('discriminator'): if reuse: tf.get_variable_scope().reuse_variables() J = 784 K = 128 L = 1 W1 = tf.get_variable('D_W1', [J, K], initializer=tf.random_normal_initializer(stddev=xavier_init([J, K]))) B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer()) W2 = tf.get_variable('D_W2', [K, L], initializer=tf.random_normal_initializer(stddev=xavier_init([K, L]))) B2 = tf.get_variable('D_B2', [L], initializer=tf.constant_initializer()) # summary tf.summary.histogram('weight1', W1) tf.summary.histogram('weight2', W2) tf.summary.histogram('biases1', B1) tf.summary.histogram('biases2', B2) fc1 = tf.nn.relu((tf.matmul(X, W1) + B1)) logits = tf.matmul(fc1, W2) + B2 prob = tf.nn.sigmoid(logits) return prob, logits
def model(self, images, input_size, output_size, isEval=None): with tf.variable_scope('nn_hidden1', reuse=isEval): #Declaring variables weights_h1 = tf.get_variable("weights_h1", [input_size, self.num_hidden1], initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(input_size)), seed=self.SEED)) weights_out = tf.get_variable("weights_out", [self.num_hidden1, output_size], initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(self.num_hidden1)), seed=self.SEED)) biases_b1 = tf.get_variable("biases_b1", [self.num_hidden1], initializer=tf.constant_initializer(0.0)) biases_out = tf.get_variable("biases_out", [output_size], initializer=tf.constant_initializer(0.0)) #Constructing variables, with DropOut layer_1 = tf.nn.relu(tf.add(tf.matmul(images, weights_h1), biases_b1)) layer_1drop = tf.nn.dropout(layer_1, self.keep_prob) #Evaluating logits logits_drop = tf.matmul(layer_1drop, weights_out) + biases_out logits=tf.matmul(layer_1, weights_out) + biases_out reg_linear = tf.nn.l2_loss(weights_out)+tf.nn.l2_loss(weights_h1) if isEval: return logits else: regularizers = reg_linear return (logits_drop, regularizers)
def initializeParameters(self, m, n): """ Arguments: m -- number of users n -- number of items Returns: parameters -- parameters['b'], global bias, scalar parameters['u'], users bias, shape (m, 1) parameters['d'], item bias, shape (1, n) parameters['P'], users feature matrix, shape (m, K) parameters['Q'], items feature matrix, shape (n, K) """ k = self.K parameters = {} parameters['b'] = tf.get_variable(name='b', dtype=tf.float64, shape=[], initializer=tf.zeros_initializer()) parameters['u'] = tf.get_variable(name='u', dtype=tf.float64, shape=[m, 1], initializer=tf.zeros_initializer()) parameters['d'] = tf.get_variable(name='d', dtype=tf.float64, shape=[1, n], initializer=tf.zeros_initializer()) parameters['P'] = tf.get_variable(name='P', dtype=tf.float64, shape=[m, k], initializer=tf.random_normal_initializer()) parameters['Q'] = tf.get_variable(name='Q', dtype=tf.float64, shape=[n, k], initializer=tf.random_normal_initializer()) return parameters
def __init__(self,sess,n_features,gamma = 0.9,lr=0.01): self.sess = sess self.s = tf.placeholder(tf.float32,[1,n_features],name='state') self.v_ = tf.placeholder(tf.float32,[1,1],name='v_next') self.r = tf.placeholder(tf.float32,None,name='r') with tf.variable_scope('Critic'): l1 = tf.layers.dense( inputs = self.s, units = 20, activation = tf.nn.relu, kernel_initializer = tf.random_normal_initializer(0,0.1), bias_initializer = tf.constant_initializer(0.1), name = 'l1' ) self.v = tf.layers.dense( inputs = l1, units = 1, activation = None, kernel_initializer=tf.random_normal_initializer(0,0.1), bias_initializer = tf.constant_initializer(0.1), name = 'V' ) with tf.variable_scope('squared_TD_error'): self.td_error = self.r + gamma * self.v_ - self.v self.loss = tf.square(self.td_error) with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
def __init__(self, encoder_rnn_output, label_onehot, is_training=True): self.encoder_rnn_output = encoder_rnn_output self.label_onehot = label_onehot self.is_training = is_training with tf.variable_scope("encoder_linear1"): context_to_hidden_W = tf.get_variable(name="context_to_hidden_W", shape=[FLAGS.RNN_SIZE + FLAGS.LABEL_CLASS, 100], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.1)) context_to_hidden_b = tf.get_variable(name="context_to_hidden_b", shape=[100], dtype=tf.float32) with tf.variable_scope("encoder_linear2"): context_to_mu_W = tf.get_variable(name="context_to_mu_W", shape=[100, FLAGS.LATENT_VARIABLE_SIZE], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.1)) context_to_mu_b = tf.get_variable(name="context_to_mu_b", shape=[FLAGS.LATENT_VARIABLE_SIZE], dtype=tf.float32) context_to_logvar_W = tf.get_variable( name="context_to_logvar_W", shape=[100, FLAGS.LATENT_VARIABLE_SIZE], dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=0.1)) context_to_logvar_b = tf.get_variable( name="context_to_logvar_b", shape=[FLAGS.LATENT_VARIABLE_SIZE], dtype=tf.float32) with tf.name_scope("rnn_output_and_label"): rnn_output_and_label = tf.concat((encoder_rnn_output, self.label_onehot), axis=1, name="concat_encoder_rnn_output_and_label") with tf.name_scope("sampler_hiddenstate"): h = tf.nn.relu(tf.matmul(rnn_output_and_label, context_to_hidden_W) + context_to_hidden_b) with tf.name_scope("mu"): self.mu = tf.matmul(h, context_to_mu_W) + context_to_mu_b with tf.name_scope("log_var"): self.logvar = tf.matmul(h, context_to_logvar_W) + context_to_logvar_b with tf.name_scope("z"): z = tf.truncated_normal((FLAGS.BATCH_SIZE, FLAGS.LATENT_VARIABLE_SIZE), stddev=1.0) with tf.name_scope("latent_variables"): self.latent_variables = self.mu + tf.exp(0.5 * self.logvar) * z
def __init__(self,sess,n_features,n_actions,lr=0.001): self.sess = sess self.s = tf.placeholder(tf.float32,[1,n_features],name='state') self.a = tf.placeholder(tf.int32,None,name='act') self.td_error = tf.placeholder(tf.float32,None,"td_error") with tf.variable_scope('Actor'): l1 = tf.layers.dense( inputs = self.s, units = 20, activation = tf.nn.relu, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'l1' ) self.acts_prob = tf.layers.dense( inputs = l1, units = n_actions, activation = tf.nn.softmax, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'acts_prob' ) with tf.variable_scope('exp_v'): log_prob = tf.log(self.acts_prob[0,self.a]) self.exp_v = tf.reduce_mean(log_prob * self.td_error) with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)
def poseEvalNetTiny(lin,locs,conf,trainPhase,dropout): lin_sz = tf.Tensor.get_shape(lin).as_list() lin_numel = reduce(operator.mul, lin_sz[1:], 1) lin_re = tf.reshape(lin,[-1,lin_numel]) lin_re = tf.nn.dropout(lin_re,dropout) with tf.variable_scope('lin_fc'): weights = tf.get_variable("weights", [lin_numel, conf.nfcfilt], initializer=tf.random_normal_initializer(stddev=0.005)) biases = tf.get_variable("biases", conf.nfcfilt, initializer=tf.constant_initializer(0)) lin_fc = tf.nn.relu(batch_norm_2D(tf.matmul(lin_re,weights)+biases,trainPhase)) loc_sz = tf.Tensor.get_shape(locs).as_list() loc_numel = reduce(operator.mul, loc_sz[1:], 1) loc_re = tf.reshape(locs,[-1,loc_numel]) with tf.variable_scope('loc_fc'): weights = tf.get_variable("weights", [loc_numel, conf.nfcfilt], initializer=tf.random_normal_initializer(stddev=0.005)) biases = tf.get_variable("biases", conf.nfcfilt, initializer=tf.constant_initializer(0)) loc_fc = tf.nn.relu(batch_norm_2D(tf.matmul(loc_re,weights)+biases,trainPhase)) joint_fc = tf.concat(1,[lin_fc,loc_fc]) with tf.variable_scope('fc1'): weights = tf.get_variable("weights", [conf.nfcfilt*2, conf.nfcfilt], initializer=tf.random_normal_initializer(stddev=0.005)) biases = tf.get_variable("biases", conf.nfcfilt, initializer=tf.constant_initializer(0)) joint_fc1 = tf.nn.relu(batch_norm_2D(tf.matmul(joint_fc,weights)+biases,trainPhase)) with tf.variable_scope('fc2'): weights = tf.get_variable("weights", [conf.nfcfilt, conf.nfcfilt], initializer=tf.random_normal_initializer(stddev=0.005)) biases = tf.get_variable("biases", conf.nfcfilt, initializer=tf.constant_initializer(0)) joint_fc2 = tf.nn.relu(batch_norm_2D(tf.matmul(joint_fc1,weights)+biases,trainPhase)) with tf.variable_scope('out'): weights = tf.get_variable("weights", [conf.nfcfilt, 2], initializer=tf.random_normal_initializer(stddev=0.005)) biases = tf.get_variable("biases", 2, initializer=tf.constant_initializer(0)) out = tf.matmul(joint_fc2,weights)+biases layer_dict = {'lin_fc':lin_fc, 'loc_fc':loc_fc, 'joint_fc1':joint_fc1, 'joint_fc2':joint_fc2, 'out':out } return out,layer_dict
def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE self.initial_state = tf.zeros([self.config.batch_size,self.config.hidden_size]) rnn_outputs = [] with tf.variable_scope("RNN") as scope: rnn_outputs = [] H = tf.get_variable("H", [self.config.hidden_size, self.config.hidden_size], initializer=tf.random_normal_initializer()) I = tf.get_variable("I", [self.config.embed_size, self.config.hidden_size], initializer=tf.random_normal_initializer()) b_1 = tf.Variable(tf.zeros([self.config.hidden_size,]), "b_1") # loop over the nums of steps state = self.initial_state for i in xrange(len(inputs)): scope.reuse_variables() tmp = tf.matmul(state,H) + tf.matmul(inputs[i], I) + b_1 state = tf.nn.dropout(tf.sigmoid(tmp),self.dropout_placeholder) rnn_outputs.append(state) self.final_state = state ### END YOUR CODE return rnn_outputs
def inference(input): weights = tf.get_variable( "weights", [784, 10], initializer=tf.random_normal_initializer()) bias = tf.get_variable( "bias", [10], initializer=tf.random_normal_initializer()) logits = tf.matmul(input, weights) + bias return logits
def conv_fire_module(name, prevLayerOut, prevLayerDim, fireDims, wd=None, **kwargs): USE_FP_16 = kwargs.get('usefp16') dtype = tf.float16 if USE_FP_16 else tf.float32 existingParams = kwargs.get('existingParams') if (fireDims.get('cnn3x3')): cnnName = 'cnn3x3' kernelSize = 3 if (fireDims.get('cnn5x5')): cnnName = 'cnn5x5' kernelSize = 5 if (fireDims.get('cnn7x7')): cnnName = 'cnn7x7' kernelSize = 7 with tf.variable_scope(name): with tf.variable_scope(cnnName) as scope: layerName = scope.name.replace("/", "_") #kernel = _variable_with_weight_decay('weights', # shape=[kernelSize, kernelSize, prevLayerDim, fireDims['cnn3x3']], # initializer=existingParams[layerName]['weights'] if (existingParams is not None and # layerName in existingParams) else # (tf.contrib.layers.xavier_initializer_conv2d() if kwargs.get('phase')=='train' else # tf.constant_initializer(0.0, dtype=dtype)), # dtype=dtype, # wd=wd, # trainable=kwargs.get('tuneExistingWeights') if (existingParams is not None and # layerName in existingParams) else True) stddev = np.sqrt(2 / np.prod(prevLayerOut.get_shape().as_list()[1:])) kernel = _variable_with_weight_decay( 'weights', shape=[ kernelSize, kernelSize, prevLayerDim, fireDims[cnnName] ], initializer=existingParams[layerName]['weights'] if (existingParams is not None and layerName in existingParams) else (tf.random_normal_initializer( stddev=stddev) if kwargs.get('phase') == 'train' else tf.constant_initializer(0.0, dtype=dtype)), dtype=dtype, wd=wd, trainable=kwargs.get('tuneExistingWeights') if (existingParams is not None and layerName in existingParams) else True) conv = tf.nn.conv2d(prevLayerOut, kernel, [1, 1, 1, 1], padding='SAME') if kwargs.get('weightNorm'): # calc weight norm conv = batch_norm('weight_norm', conv, dtype) if existingParams is not None and layerName in existingParams: biases = tf.get_variable( 'biases', initializer=existingParams[layerName]['biases'], dtype=dtype) else: biases = tf.get_variable( 'biases', [fireDims[cnnName]], initializer=tf.constant_initializer(0.0), dtype=dtype) conv = tf.nn.bias_add(conv, biases) convRelu = tf.nn.relu(conv, name=scope.name) _activation_summary(convRelu) return convRelu, fireDims[cnnName]
def fconv_layer(input_data, filter_num, name, is_train=True, padding="VALID", init_method="xavier", bias_term=True, is_pretrain=True): """ fully conv layer :param input_data: the input data :param filter_num: the number of the convolutional kernel :param name: name of the layer :param is_train: if False, skip this layer, default is True :param padding: the padding method, "SAME" | "VALID" (default: "VALID") :param init_method: the method of weights initialization (default: xavier) :param bias_term: whether the bias term exists or not (default: False) :param is_pretrain: whether the parameters are trainable (default: True) :return: output: a 4-D tensor [batch_size, height, width. channel] """ if is_train is True: shape = input_data.get_shape() conv_height, conv_width, conv_channel = shape[1].value, shape[ 2].value, shape[3].value with tf.variable_scope(name): # the method of weights initialization if init_method == "xavier": initializer = tf.contrib.layers.xavier_initializer() elif init_method == "gaussian": initializer = tf.random_normal_initializer(stddev=0.01) else: initializer = tf.truncated_normal_initializer(stddev=0.01) weights = tf.get_variable( name="weights", shape=[conv_height, conv_width, conv_channel, filter_num], dtype=tf.float32, initializer=initializer, trainable=is_pretrain) biases = tf.get_variable(name="biases", shape=[filter_num], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=is_pretrain) feature_map = tf.nn.conv2d(input=input_data, filter=weights, strides=[1, 1, 1, 1], padding=padding, name="conv") # biases term if bias_term is True: output = tf.nn.bias_add(value=feature_map, bias=biases, name="biases_add") else: output = feature_map else: output = input_data # info show shape = output.get_shape() print("name: %s, shape: (%d, %d, %d)" % (name, shape[1], shape[2], shape[3])) return output
def generator(z_sample, isTrainable=True, reuse=False, name='theta_generator'): with tf.variable_scope(name) as scope: #decoder_activations = {}; if reuse: scope.reuse_variables() #print('z_sample.shape: ',z_sample.shape); z_sample = tf.layers.dense( z_sample, 4 * 4 * 1024, activation=None, trainable=isTrainable, reuse=reuse, name='dec_dense_fc_first_layer', kernel_initializer=tf.truncated_normal_initializer(stddev=stddev)) z_sample = tf.layers.batch_normalization(z_sample, training=isTrainable, reuse=reuse, name='bn_0') z_sample = tf.nn.relu(z_sample) z_sample = tf.reshape(z_sample, [-1, 4, 4, 1024]) #8x8x512 deconv1 = tf.layers.conv2d_transpose( z_sample, kernel_initializer=tf.random_normal_initializer(stddev=stddev), filters=512, kernel_size=[3, 3], padding='SAME', activation=None, strides=(2, 2), name='dec_deconv1_layer', trainable=isTrainable, reuse=reuse) # 16x16 deconv1 = tf.layers.batch_normalization(deconv1, training=isTrainable, reuse=reuse, name='bn_1') deconv1 = tf.nn.relu(deconv1, name='relu_deconv_1') # #16x16x256 deconv2 = tf.layers.conv2d_transpose( deconv1, kernel_initializer=tf.truncated_normal_initializer(stddev=stddev), filters=256, kernel_size=[5, 5], padding='SAME', activation=None, strides=(2, 2), name='dec_deconv2_layer', trainable=isTrainable, reuse=reuse) # 16x16 deconv2 = tf.layers.batch_normalization(deconv2, training=isTrainable, reuse=reuse, name='bn_2') deconv2 = tf.nn.relu(deconv2, name='relu_deconv_2') #32x32x128 deconv3 = tf.layers.conv2d_transpose( deconv2, kernel_initializer=tf.truncated_normal_initializer(stddev=stddev), filters=128, kernel_size=[5, 5], padding='SAME', activation=None, strides=(2, 2), name='dec_deconv3_layer', trainable=isTrainable, reuse=reuse) # 16x16 deconv3 = tf.layers.batch_normalization(deconv3, training=isTrainable, reuse=reuse, name='bn_3') deconv3 = tf.nn.relu(deconv3, name='relu_deconv_3') deconv4 = tf.layers.conv2d_transpose( deconv3, kernel_initializer=tf.truncated_normal_initializer(stddev=stddev), filters=64, kernel_size=[5, 5], padding='SAME', activation=None, strides=(2, 2), name='dec_deconv4_layer', trainable=isTrainable, reuse=reuse) # 16x16 deconv4 = tf.layers.batch_normalization(deconv4, training=isTrainable, reuse=reuse, name='bn_4') deconv4 = tf.nn.relu(deconv4, name='relu_deconv_4') #64x64x64 deconv5 = tf.layers.conv2d_transpose( deconv4, kernel_initializer=tf.truncated_normal_initializer(stddev=stddev), filters=3, kernel_size=[5, 5], padding='SAME', activation=None, strides=(1, 1), name='dec_deconv5_layer', trainable=isTrainable, reuse=reuse) # 16x16 #deconv4 = tf.layers.dropout(deconv4,rate=keep_prob,training=True); deconv5 = tf.nn.tanh(deconv5) #64x64x3 deconv_5_reshaped = tf.reshape( deconv5, [-1, img_height, img_width, num_channels]) return deconv_5_reshaped
def main(args): rmf_path = os.path.join(args.data_path, RATING_MATRIX_FILE) train_file = os.path.join(args.data_path, TRAIN_FILE) test_file = os.path.join(args.data_path, TEST_FILE) print(rmf_path) print(train_file) print(test_file) rm = Reader.get_rating_matrix(rmf_path) train_data = Reader(train_file, rm, args.batch_size, args.chunk_size, args.num_ratings) test_data = Reader(test_file, rm, args.batch_size, args.chunk_size, args.num_ratings) num_users = train_data.num_users num_items = train_data.num_items epochs = args.num_epochs print(rm.shape) print("Number of users: ", num_users) print("Number of items: ", num_items) if args.chunk_size == 0: cs = train_data.max_seq_len else: cs = min(args.chunk_size, train_data.max_seq_len) settings = { "batch_size": args.batch_size, "chunk_size": cs, "lr": args.learning_rate, "rho": args.decay, "num_users": num_users, "num_items": num_items, "num_units": args.num_units, "k": args.num_ratings, "cell_act": args.cell_activation, #"debug": True, } print("Model configuration:\n", settings) train_errors = list() test_errors = list() _train_rmse = list() _test_rmse = list() with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_normal_initializer(mean=0, stddev=1 / sqrt(args.num_units)) with tf.variable_scope("model", reuse=None, initializer=initializer): train_model = RNADECF(is_training=True, **settings) with tf.variable_scope("model", reuse=True, initializer=initializer): test_model = RNADECF(is_training=False, **settings) # Initializing all model weights tf.global_variables_initializer().run() with open("log.txt", "w+", 0) as log: log.write("Rating matrix file path : [{}]\n".format(rmf_path)) log.write("Training data file : [{}]\n".format(train_file)) log.write("Testing data file : [{}]\n".format(test_file)) log.write("Model configuration:\n{}\n".format(settings)) for i in range(1, epochs + 1): train_err, test_err, train_rmse, test_rmse = run_epoch( train_model, test_model, session, train_data, test_data, log) log.write("Epoch {}: Training error {}\n".format(i, train_err)) log.write("Epoch {}: Test error {}\n".format(i, test_err)) log.write("Epoch {}: Training RMSE {}\n".format(i, train_rmse)) log.write("Epoch {}: Test RMSE {}\n".format(i, test_rmse)) print("Epoch {}: Training error {}\n".format(i, train_err)) print("Epoch {}: Test error {}\n".format(i, test_err)) print("Epoch {}: Training RMSE {}\n".format(i, train_rmse)) print("Epoch {}: Test RMSE {}\n".format(i, test_rmse)) train_errors.append(train_err) test_errors.append(test_err) _train_rmse.append(train_rmse) _test_rmse.append(test_rmse) x = range(epochs) f, axarr = plt.subplots(2, sharex=True) axarr[0].plot(x, train_errors) axarr[0].plot(x, test_errors) axarr[0].scatter(x, train_errors) axarr[0].scatter(x, test_errors) axarr[0].set_title('Cross-entopy loss') axarr[1].plot(x, _train_rmse) axarr[1].plot(x, _test_rmse) axarr[1].scatter(x, _train_rmse) axarr[1].scatter(x, _test_rmse) axarr[1].set_title('RMSE') plt.savefig("results.png")
tf.reset_default_graph() ad = [ 8.32329655, 8.32329655, 8.32329845, 8.32329845, 8.32329845, 8.32329845, 8.32329845, 8.32329941, 8.28163242, 8.32329845, 8.32329845, 8.32329655, 8.32329941, 8.32329655, 8.32329845 , 8.32329655 , 8.32329845 , 8.32329845, 8.32329655, 8.3231554, 8.32329655, 8.32329845, 8.32325554, 8.32329845, 8.32329655, 8.28935242, 8.32329655, 8.3232975 , 8.32329941 , 8.32329845, 8.32329845, 8.32329082, 8.32329845, 8.32329655, 8.32329845, 8.32329845, 8.32329845, 8.32329845, 8.32329655, 8.3232975 , 8.32329655, 8.32317448, 8.32329655, 8.3231802 , 8.32329845, 8.27982044, 8.32329655, 8.32329845, 8.32329845, 8.32329845, 8.3232975 , 8.32329845, 8.3232975 , 8.3232975, 8.32329845, 8.3232975 , 8.27784348, 8.32329845, 8.32308769, 8.32329845, 8.32314777, 8.30504799, 8.32329845, 8.3232975 ] a = tf.Variable(ad) b = tf.get_variable('b', shape = (2,3,5), initializer = tf.random_normal_initializer() ) batch_start = tf.cast(tf.ones_like(b[:, 0:1, 0:1]), dtype = tf.int64) batch_start = tf.multiply(batch_start, 2) batch_start = tf.squeeze(batch_start, axis = [-1]) sess = tf.Session() sess.run(tf.global_variables_initializer()) print(sess.run(a)) print(sess.run(b)) print() print(sess.run(batch_start)) print()
from __future__ import print_function, division, absolute_import, unicode_literals import tensorflow.contrib as tf_contrib from tensorflow.contrib.layers.python.layers import layers import os import shutil import numpy as np import logging from collections import OrderedDict from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score import tensorflow as tf from ops import * from nets.tf_unet.layers import * logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') weight_init = tf.random_normal_initializer(mean=0.0, stddev=0.1) def Unet(x, labels, keep_prob=1.0, channels=3, n_class=2, num_layers=5, features_root=64, filter_size=3, pool_size=2, summaries=True, trainable=True, reuse=False, scope='dis'):
vgg.build(model_input) # define the new fc layers here with tf.name_scope('face_weights'): # w1 = tf.get_variable("w1",[vgg_out_dim,fc2_num_weights],initializer = tf.random_normal_initializer(stddev = 1e-4)) # b1 = tf.get_variable("b1",initializer = tf.zeros([fc2_num_weights])) # # w2 = tf.get_variable("w2",[fc2_num_weights,fin_out_dim],initializer = tf.random_normal_initializer(stddev = 1e-4)) # b2 = tf.get_variable("b2",initializer = tf.zeros([fin_out_dim])) vgg_oup = vgg.fc6 fc1 = tf.layers.dropout(tf.layers.dense( vgg_oup, fc2_num_weights, activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(stddev=1e-4), name='fc1'), rate=0.2) fin_out = tf.layers.dense( tf.layers.batch_normalization(fc1), fin_out_dim, activation=tf.nn.relu, kernel_initializer=tf.random_normal_initializer(stddev=1e-4), name='model_output') # res of the computation graphs # first fc # fc2 = tf.nn.relu(tf.nn.dropout(tf.nn.xw_plus_b(vgg_oup,w1,b1),0.8)) # # second (and last) fc # fin_out = tf.nn.relu(tf.nn.xw_plus_b(fc2,w2,b2),name = 'model_output') # train op
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes): """ Create the layers for a fully convolutional network. Build skip-layers using the vgg layers. :param vgg_layer3_out: TF Tensor for VGG Layer 3 output :param vgg_layer4_out: TF Tensor for VGG Layer 4 output :param vgg_layer7_out: TF Tensor for VGG Layer 7 output :param num_classes: Number of classes to classify :return: The Tensor for the last layer of output """ # TODO: Implement function #conv 1x1 of layer7 layer7_conv1x1 = tf.layers.conv2d( vgg_layer7_out, num_classes, 1, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #upsampling output1 = tf.layers.conv2d_transpose( layer7_conv1x1, num_classes, 4, strides=(2, 2), padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #conv 1x1 of layer4 layer4_conv1x1 = tf.layers.conv2d( vgg_layer4_out, num_classes, 1, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #Skip connection skip1 = tf.add(output1, layer4_conv1x1) #upsampling output2 = tf.layers.conv2d_transpose( skip1, num_classes, 4, strides=(2, 2), padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #conv 1x1 of layer3 layer3_conv1x1 = tf.layers.conv2d( vgg_layer3_out, num_classes, 1, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) #Skip connection skip2 = tf.add(output2, layer3_conv1x1) #Upsampling final_output = tf.layers.conv2d_transpose( skip2, num_classes, 16, strides=(8, 8), padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) return final_output
WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE) # -------------------------------------------- Data_preprocess_config DATASET_NAME = 'coco' # 'pascal', 'coco' PIXEL_MEAN = [123.68, 116.779, 103.939 ] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR PIXEL_MEAN_ = [0.485, 0.456, 0.406] PIXEL_STD = [0.229, 0.224, 0.225 ] # R, G, B. In tf, channel is RGB. In openCV, channel is BGR IMG_SHORT_SIDE_LEN = 600 IMG_MAX_LENGTH = 1000 CLASS_NUM = 80 # --------------------------------------------- Network_config SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0, stddev=0.01, seed=None) SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0) PROBABILITY = 0.01 FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer( value=-math.log((1.0 - PROBABILITY) / PROBABILITY)) WEIGHT_DECAY = 1e-4 # ---------------------------------------------Anchor config LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7'] BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512] ANCHOR_STRIDE = [8, 16, 32, 64, 128] ANCHOR_SCALES = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] ANCHOR_RATIOS = [0.5, 1.0, 2.0] ANCHOR_SCALE_FACTORS = None USE_CENTER_OFFSET = True
def __init__(self, level_sizes, init_lr_range=(1e-6, 1e-2), learnable_decay=True, dynamic_output_scale=True, use_attention=False, use_log_objective=True, num_gradient_scales=4, zero_init_lr_weights=True, use_log_means_squared=True, use_relative_lr=True, use_extreme_indicator=False, max_log_lr=33, obj_train_max_multiplier=-1, use_problem_lr_mean=False, use_gradient_shortcut=False, use_lr_shortcut=False, use_grad_products=False, use_multiple_scale_decays=False, learnable_inp_decay=True, learnable_rnn_init=True, random_seed=None, **kwargs): """Initializes the RNN per-parameter optimizer. The hierarchy consists of up to three levels: Level 0: per parameter RNN Level 1: per tensor RNN Level 2: global RNN Args: level_sizes: list or tuple with 1, 2, or 3 integers, the number of units in each RNN in the hierarchy (level0, level1, level2). length 1: only coordinatewise rnn's will be used length 2: coordinatewise and tensor-level rnn's will be used length 3: a single global-level rnn will be used in addition to coordinatewise and tensor-level init_lr_range: the range in which to initialize the learning rates learnable_decay: whether to learn weights that dynamically modulate the input scale via RMS style decay dynamic_output_scale: whether to learn weights that dynamically modulate the output scale use_attention: whether to use attention to train the optimizer use_log_objective: whether to train on the log of the objective num_gradient_scales: the number of scales to use for gradient history zero_init_lr_weights: whether to initialize the lr weights to zero use_log_means_squared: whether to track the log of the means_squared, used as a measure of signal vs. noise in gradient. use_relative_lr: whether to use the relative learning rate as an input during training (requires learnable_decay=True) use_extreme_indicator: whether to use the extreme indicator for learning rates as an input during training (requires learnable_decay=True) max_log_lr: the maximum log learning rate allowed during train or test obj_train_max_multiplier: max objective increase during a training run use_problem_lr_mean: whether to use the mean over all learning rates in the problem when calculating the relative learning rate as opposed to the per-tensor mean use_gradient_shortcut: Whether to add a learned affine projection of the gradient to the update delta in addition to the gradient function computed by the RNN use_lr_shortcut: Whether to add as input the difference between the log lr and the desired log lr (1e-3) use_grad_products: Whether to use gradient products in the rnn input. Only applicable if num_gradient_scales > 1 use_multiple_scale_decays: Whether to use multiple scales for the scale decay, as with input decay learnable_inp_decay: Whether to learn the input decay weights and bias. learnable_rnn_init: Whether to learn the RNN state initialization. random_seed: Random seed for random variable initializers. (Default: None) **kwargs: args passed to TrainableOptimizer's constructor Raises: ValueError: If level_sizes is not a length 1, 2, or 3 list. ValueError: If there are any non-integer sizes in level_sizes. ValueError: If the init lr range is not of length 2. ValueError: If the init lr range is not a valid range (min > max). """ if len(level_sizes) not in [1, 2, 3]: raise ValueError( "HierarchicalRNN only supports 1, 2, or 3 levels in the " "hierarchy, but {} were requested.".format(len(level_sizes))) if any(not isinstance(level, int) for level in level_sizes): raise ValueError( "Level sizes must be integer values, were {}".format( level_sizes)) if len(init_lr_range) != 2: raise ValueError("Initial LR range must be len 2, was {}".format( len(init_lr_range))) if init_lr_range[0] > init_lr_range[1]: raise ValueError("Initial LR range min is greater than max.") self.learnable_decay = learnable_decay self.dynamic_output_scale = dynamic_output_scale self.use_attention = use_attention self.use_log_objective = use_log_objective self.num_gradient_scales = num_gradient_scales self.zero_init_lr_weights = zero_init_lr_weights self.use_log_means_squared = use_log_means_squared self.use_relative_lr = use_relative_lr self.use_extreme_indicator = use_extreme_indicator self.max_log_lr = max_log_lr self.use_problem_lr_mean = use_problem_lr_mean self.use_gradient_shortcut = use_gradient_shortcut self.use_lr_shortcut = use_lr_shortcut self.use_grad_products = use_grad_products self.use_multiple_scale_decays = use_multiple_scale_decays self.learnable_inp_decay = learnable_inp_decay self.learnable_rnn_init = learnable_rnn_init self.random_seed = random_seed self.num_layers = len(level_sizes) self.init_lr_range = init_lr_range self.reuse_vars = None self.reuse_global_state = None self.cells = [] self.init_vectors = [] with tf.variable_scope(opt.OPTIMIZER_SCOPE): self._initialize_rnn_cells(level_sizes) # get the cell size for the per-parameter RNN (Level 0) cell_size = level_sizes[0] # Random normal initialization scaled by the output size. This is the # scale for the RNN *readouts*. RNN internal weight scale is set in the # BiasGRUCell call. scale_factor = FLAGS.hrnn_rnn_readout_scale / math.sqrt(cell_size) scaled_init = tf.random_normal_initializer(0., scale_factor, seed=self.random_seed) # weights for projecting the hidden state to a parameter update self.update_weights = tf.get_variable("update_weights", shape=(cell_size, 1), initializer=scaled_init) if self.use_attention: # weights for projecting the hidden state to the location at which the # gradient is attended self.attention_weights = tf.get_variable( "attention_weights", initializer=self.update_weights.initialized_value()) # weights for projecting the hidden state to the RMS decay term self._initialize_scale_decay((cell_size, 1), scaled_init) self._initialize_input_decay((cell_size, 1), scaled_init) self._initialize_lr((cell_size, 1), scaled_init) state_keys = [ "parameter", "layer", "scl_decay", "inp_decay", "true_param" ] if self.dynamic_output_scale: state_keys.append("log_learning_rate") for i in range(self.num_gradient_scales): state_keys.append("grad_accum{}".format(i + 1)) state_keys.append("ms{}".format(i + 1)) super(HierarchicalRNN, self).__init__("hRNN", state_keys, use_attention=use_attention, use_log_objective=use_log_objective, obj_train_max_multiplier=obj_train_max_multiplier, **kwargs)
with tf.variable_scope('conv1') as scope: # first, reshape the image to [BATCH_SIZE, 28, 28, 1] to make it work with tf.nn.conv2d # use the dynamic dimension -1 images = tf.reshape(X, [-1, 28, 28, 1], name="input") # create kernel variable of dimension [5, 5, 1, 32] # use tf.truncated_normal_initializer() kernel = tf.get_variable("kernel", shape=[5, 5, 1, 32], initializer=tf.truncated_normal_initializer()) # create biases variable of dimension [32] # use tf.random_normal_initializer() biases = tf.get_variable("biases", shape=[32], initializer=tf.random_normal_initializer()) # apply tf.nn.conv2d. strides [1, 1, 1, 1], padding is 'SAME' conv1 = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding="SAME") # apply relu on the sum of convolution output and biases relu1 = tf.nn.relu(conv1 + biases) # output is of dimension BATCH_SIZE x 28 x 28 x 32 with tf.variable_scope('pool1') as scope: # apply max pool with k-size [1, 2, 2, 1], and strides [1, 2, 2, 1], padding 'SAME' pool1 = tf.nn.max_pool(relu1, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME") # output is of dimension BATCH_SIZE x 14 x 14 x 32
def batchnorm(axis=-1, momentum=0.99, epsilon=0.001): initializer = tf.random_normal_initializer(0, 0.02) return tf.keras.layers.BatchNormalization(axis, momentum=momentum, epsilon=epsilon, gamma_initializer=initializer)
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes): """ Create the layers for a fully convolutional network. Build skip-layers using the vgg layers. :param vgg_layer3_out: TF Tensor for VGG Layer 3 output :param vgg_layer4_out: TF Tensor for VGG Layer 4 output :param vgg_layer7_out: TF Tensor for VGG Layer 7 output :param num_classes: Number of classes to classify :return: The Tensor for the last layer of output """ # TODO: Implement function # FCN Layer 1 fcn_layer1_out = tf.layers.conv2d(vgg_layer7_out, num_classes, 1, strides=1, padding='same', name="fcn_layer1_out") # FCN Layer 2 fcn_layer2_trans = tf.layers.conv2d_transpose( fcn_layer1_out, num_classes, 4, strides=2, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3), name="fcn_layer2_trans") fcn_layer2_1x1 = tf.layers.conv2d(vgg_layer4_out, num_classes, 1, strides=1, padding='same', name="fcn_layer2_1x1") fcn_layer2_skip = tf.add(fcn_layer2_trans, fcn_layer2_1x1, name='fcn_layer2_skip') # FCN Layer 3 fcn_layer3_trans = tf.layers.conv2d_transpose( fcn_layer2_skip, num_classes, 4, strides=2, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3), name="fcn_layer3_trans") fcn_layer3_1x1 = tf.layers.conv2d(vgg_layer3_out, num_classes, 1, strides=1, padding='same', name="cn_layer4_1x1") fcn_layer4_skip = tf.add(fcn_layer3_trans, fcn_layer3_1x1, name='fcn_layer4_skip') # FCN final layer. fcn_final = tf.layers.conv2d_transpose( fcn_layer4_skip, num_classes, 16, strides=8, padding='same', kernel_initializer=tf.random_normal_initializer(stddev=0.01), kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3), name='fcn_final') return fcn_final
def _build_word_char_embeddings(self): ''' options contains key 'char_cnn': { 'n_characters': 262, # includes the start / end characters 'max_characters_per_token': 50, 'filters': [ [1, 32], [2, 32], [3, 64], [4, 128], [5, 256], [6, 512], [7, 512] ], 'activation': 'tanh', # for the character embedding 'embedding': {'dim': 16} # for highway layers # if omitted, then no highway layers 'n_highway': 2, } ''' batch_size = self.options['batch_size'] unroll_steps = self.options['unroll_steps'] projection_dim = self.options['lstm']['projection_dim'] cnn_options = self.options['char_cnn'] filters = cnn_options['filters'] n_filters = sum(f[1] for f in filters) max_chars = cnn_options['max_characters_per_token'] char_embed_dim = cnn_options['embedding']['dim'] n_chars = cnn_options['n_characters'] if n_chars != 261: raise InvalidNumberOfCharacters( "Set n_characters=261 for training see the README.md") if cnn_options['activation'] == 'tanh': activation = tf.nn.tanh elif cnn_options['activation'] == 'relu': activation = tf.nn.relu # the input character ids self.tokens_characters = tf.placeholder(DTYPE_INT, shape=(batch_size, unroll_steps, max_chars), name='tokens_characters') # the character embeddings with tf.device("/cpu:0"): self.embedding_weights = tf.get_variable( "char_embed", [n_chars, char_embed_dim], dtype=DTYPE, initializer=tf.random_uniform_initializer(-1.0, 1.0)) # shape (batch_size, unroll_steps, max_chars, embed_dim) self.char_embedding = tf.nn.embedding_lookup( self.embedding_weights, self.tokens_characters) if self.bidirectional: self.tokens_characters_reverse = tf.placeholder( DTYPE_INT, shape=(batch_size, unroll_steps, max_chars), name='tokens_characters_reverse') self.char_embedding_reverse = tf.nn.embedding_lookup( self.embedding_weights, self.tokens_characters_reverse) # the convolutions def make_convolutions(inp, reuse): with tf.variable_scope('CNN', reuse=reuse) as scope: convolutions = [] for i, (width, num) in enumerate(filters): if cnn_options['activation'] == 'relu': # He initialization for ReLU activation # with char embeddings init between -1 and 1 #w_init = tf.random_normal_initializer( # mean=0.0, # stddev=np.sqrt(2.0 / (width * char_embed_dim)) #) # Kim et al 2015, +/- 0.05 w_init = tf.random_uniform_initializer(minval=-0.05, maxval=0.05) elif cnn_options['activation'] == 'tanh': # glorot init w_init = tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / (width * char_embed_dim))) w = tf.get_variable("W_cnn_%s" % i, [1, width, char_embed_dim, num], initializer=w_init, dtype=DTYPE) b = tf.get_variable( "b_cnn_%s" % i, [num], dtype=DTYPE, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d( inp, w, strides=[1, 1, 1, 1], padding="VALID") + b # now max pool conv = tf.nn.max_pool(conv, [1, 1, max_chars - width + 1, 1], [1, 1, 1, 1], 'VALID') # activation conv = activation(conv) conv = tf.squeeze(conv, squeeze_dims=[2]) convolutions.append(conv) return tf.concat(convolutions, 2) # for first model, this is False, for others it's True reuse = tf.get_variable_scope().reuse embedding = make_convolutions(self.char_embedding, reuse) self.token_embedding_layers = [embedding] if self.bidirectional: # re-use the CNN weights from forward pass embedding_reverse = make_convolutions(self.char_embedding_reverse, True) # for highway and projection layers: # reshape from (batch_size, n_tokens, dim) to n_highway = cnn_options.get('n_highway') use_highway = n_highway is not None and n_highway > 0 use_proj = n_filters != projection_dim if use_highway or use_proj: embedding = tf.reshape(embedding, [-1, n_filters]) if self.bidirectional: embedding_reverse = tf.reshape(embedding_reverse, [-1, n_filters]) # set up weights for projection if use_proj: assert n_filters > projection_dim with tf.variable_scope('CNN_proj') as scope: W_proj_cnn = tf.get_variable( "W_proj", [n_filters, projection_dim], initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / n_filters)), dtype=DTYPE) b_proj_cnn = tf.get_variable( "b_proj", [projection_dim], initializer=tf.constant_initializer(0.0), dtype=DTYPE) # apply highways layers def high(x, ww_carry, bb_carry, ww_tr, bb_tr): carry_gate = tf.nn.sigmoid(tf.matmul(x, ww_carry) + bb_carry) transform_gate = tf.nn.relu(tf.matmul(x, ww_tr) + bb_tr) return carry_gate * transform_gate + (1.0 - carry_gate) * x if use_highway: highway_dim = n_filters for i in range(n_highway): with tf.variable_scope('CNN_high_%s' % i) as scope: W_carry = tf.get_variable( 'W_carry', [highway_dim, highway_dim], # glorit init initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / highway_dim)), dtype=DTYPE) b_carry = tf.get_variable( 'b_carry', [highway_dim], initializer=tf.constant_initializer(-2.0), dtype=DTYPE) W_transform = tf.get_variable( 'W_transform', [highway_dim, highway_dim], initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / highway_dim)), dtype=DTYPE) b_transform = tf.get_variable( 'b_transform', [highway_dim], initializer=tf.constant_initializer(0.0), dtype=DTYPE) embedding = high(embedding, W_carry, b_carry, W_transform, b_transform) if self.bidirectional: embedding_reverse = high(embedding_reverse, W_carry, b_carry, W_transform, b_transform) self.token_embedding_layers.append( tf.reshape(embedding, [batch_size, unroll_steps, highway_dim])) # finally project down to projection dim if needed if use_proj: embedding = tf.matmul(embedding, W_proj_cnn) + b_proj_cnn if self.bidirectional: embedding_reverse = tf.matmul(embedding_reverse, W_proj_cnn) \ + b_proj_cnn self.token_embedding_layers.append( tf.reshape(embedding, [batch_size, unroll_steps, projection_dim])) # reshape back to (batch_size, tokens, dim) if use_highway or use_proj: shp = [batch_size, unroll_steps, projection_dim] embedding = tf.reshape(embedding, shp) if self.bidirectional: embedding_reverse = tf.reshape(embedding_reverse, shp) # at last assign attributes for remainder of the model self.embedding = embedding if self.bidirectional: self.embedding_reverse = embedding_reverse
import tensorflow as tf with tf.Session() as sess: init1 = tf.random_uniform_initializer(-1.0, 1.0) var1 = tf.get_variable("var1", shape=[2, 4], initializer=init1) var1_ = tf.Variable(tf.random_uniform([2, 4], -1.0, 1.0)) init2 = tf.constant_initializer(1.0, dtype=tf.float32) var2 = tf.get_variable("var2", shape=[2, 4], initializer=init2) var2_ = tf.Variable(tf.constant(1.0, shape=[2, 4], dtype=tf.float32)) init3 = tf.random_normal_initializer(1.0, dtype=tf.float32) var3 = tf.get_variable("var3", shape=[2, 4], initializer=init3) var3_ = tf.Variable(tf.random_normal([2, 4], 0.0, 1.0, dtype=tf.float32)) init4 = tf.truncated_normal_initializer(0.0, 1.0, dtype=tf.float32) var4 = tf.get_variable("var4", shape=[2, 4], initializer=init4) var4_ = tf.Variable(tf.truncated_normal([2, 4], 0.0, 1.0, dtype=tf.float32)) init5 = tf.zeros_initializer([2, 2], dtype=tf.float32) var5 = tf.get_variable("var5", initializer=init5) var5_ = tf.Variable(tf.zeros(shape=[2, 4], dtype=tf.float32)) init6 = tf.ones_initializer([2, 2], dtype=tf.float32) var6 = tf.get_variable("var6", initializer=init6) var6_ = tf.Variable(tf.ones(shape=[2, 4], dtype=tf.float32)) init7 = tf.uniform_unit_scaling_initializer(dtype=tf.float32) var7 = tf.get_variable("var7", shape=[2, 4], initializer=init7)
def batchnorm(inputs): return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(144, 24, 2, 1)] + [(144, 24, 1, 1)] * 3), resnet_utils.Block('block2', bottleneck, [(288, 144, 2, 1)] + [(288, 144, 1, 1)] * 7), resnet_utils.Block('block3', bottleneck, [(576, 288, 1, 1)] + [(576, 288, 1, 1)] * 3), ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_xception', 'resnet_v1_xception'): net = resnet_utils.conv2d_same( image, 24, 3, stride=2, scope='conv1') #rate (atrous conv must be delete) net = slim.max_pool2d( net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1( net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_xception') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1( net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_xception') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_xception') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) with tf.variable_scope( 'resnet_v1_xception', 'resnet_v1_xception', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d( net_conv3, 256, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d( rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d( rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr( height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores,rois_before_nms = proposal_opr( rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=False) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope( 'resnet_v1_xception', 'resnet_v1_xception', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): ps_chl = 7 * 7 * 10 ps_fm = rfcn_plus_plus_opr.global_context_module( net_conv4, prefix='conv_new_1', # ks=15, chl_mid=256, chl_out=ps_chl) ks=15, chl_mid=64, chl_out=ps_chl) ps_fm = nn_ops.relu(ps_fm) [psroipooled_rois, _, _] = psalign_pooling_op.psalign_pool( ps_fm, rois, group_size=7, sample_height=2, sample_width=2, spatial_scale=1.0/16.0) #[psroipooled_rois, _] = psroi_pooling_op.psroi_pool( # ps_fm, rois, group_size=7, spatial_scale=1.0 / 16.0) psroipooled_rois = slim.flatten(psroipooled_rois) ps_fc_1 = slim.fully_connected( psroipooled_rois, 2048, weights_initializer=initializer, activation_fn=nn_ops.relu, trainable=is_training, scope='ps_fc_1') cls_score = slim.fully_connected( ps_fc_1, cfg.num_classes, weights_initializer=initializer, activation_fn=None, trainable=is_training, scope='cls_fc') bbox_pred = slim.fully_connected( ps_fc_1, 4 * cfg.num_classes, weights_initializer=initializer_bbox, activation_fn=None, trainable=is_training, scope='bbox_fc') cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") #conv_new_1 = slim.conv2d( # net_conv5, 1024, [1, 1], trainable=is_training, # weights_initializer=initializer, activation_fn=nn_ops.relu, # scope="conv_new_1") #rfcn_cls = slim.conv2d( # conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], # trainable=is_training, weights_initializer=initializer, # activation_fn=None, scope="rfcn_cls") #rfcn_bbox = slim.conv2d( # conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, # weights_initializer=initializer, # activation_fn=None, scope="rfcn_bbox") #[psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) #[psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) #cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) #bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) #cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") #bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) tf.add_to_collection("rois_before_nms", rois_before_nms) tf.add_to_collection("roi_scores", roi_scores) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss
def discrim_conv(batch_input, out_channels, stride): padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT") return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))
def __init__( self, prev_layer, decay=0.9, epsilon=0.00001, act=None, is_train=False, beta_init=tf.zeros_initializer, gamma_init=tf.random_normal_initializer(mean=1.0, stddev=0.002), moving_mean_init=tf.zeros_initializer(), name='batchnorm_layer', ): super(BatchNormLayer, self).__init__(prev_layer=prev_layer, act=act, name=name) # logging.info( # "BatchNormLayer %s: decay: %f epsilon: %f act: %s is_train: %s" % # (self.name, decay, epsilon, self.act.__name__ if self.act is not None else 'No Activation', is_train) # ) x_shape = self.inputs.get_shape() params_shape = x_shape[-1:] with tf.variable_scope(name): axis = list(range(len(x_shape) - 1)) # 1. beta, gamma variables = [] if beta_init: if beta_init == tf.zeros_initializer: beta_init = beta_init() beta = tf.get_variable( 'beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train ) variables.append(beta) else: beta = None if gamma_init: gamma = tf.get_variable( 'gamma', shape=params_shape, initializer=gamma_init, dtype=LayersConfig.tf_dtype, trainable=is_train, ) variables.append(gamma) else: gamma = None # 2. moving_mean = tf.get_variable( 'moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False ) moving_variance = tf.get_variable( 'moving_variance', params_shape, initializer=tf.constant_initializer(1.), dtype=LayersConfig.tf_dtype, trainable=False, ) # 3. # These ops will only be preformed when training. mean, variance = tf.nn.moments(self.inputs, axis) update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=False ) # if zero_debias=True, has bias update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False ) # if zero_debias=True, has bias def mean_var_with_update(): with tf.control_dependencies([update_moving_mean, update_moving_variance]): return tf.identity(mean), tf.identity(variance) if is_train: mean, var = mean_var_with_update() else: mean, var = moving_mean, moving_variance self.outputs = self._apply_activation( tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon) ) variables.extend([moving_mean, moving_variance]) self._add_layers(self.outputs) self._add_params(variables)
# Add tensorflow's default dropout. Can not simply add dropout to all hidden units in a LSTM, as they have relationships with each other lstm_drop = tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p) hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis') evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence') # lstm_size: the size of the gates in the LSTM, as in the first LSTM layer's initialization. # The LSTM used for looking backwards through the sentences, similar to lstm. lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size) # A dropout wrapper for lstm_back, like lstm_drop. lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p) # Initial values for the fully connected layer's weights. fc_initializer = tf.random_normal_initializer(stddev=0.1) fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3]) fc_bias = tf.get_variable('bias', [3]) # x: the inputs to the bidirectional_rnn x = tf.concat([hyp, evi], 1) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(x, l_seq,) rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back, x, dtype=tf.float32)
def _build_net(self): # ------------------ all inputs ------------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input State self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State self.r = tf.placeholder(tf.float32, [ None, ], name='r') # input Reward self.a = tf.placeholder(tf.int32, [ None, ], name='a') # input Action w_initializer, b_initializer = tf.random_normal_initializer( 0., 0.3), tf.constant_initializer(0.1) # ------------------ build evaluate_net ------------------ with tf.variable_scope('eval_net'): e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='e1') self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='q') # ------------------ build target_net ------------------ with tf.variable_scope('target_net'): t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t1') self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t2') with tf.variable_scope('q_target'): q_target = self.r + self.gamma * tf.reduce_max( self.q_next, axis=1, name='Qmax_s_') # shape=(None, ) self.q_target = tf.stop_gradient(q_target) with tf.variable_scope('q_eval'): a_indices = tf.stack( [tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1) self.q_eval_wrt_a = tf.gather_nd( params=self.q_eval, indices=a_indices) # shape=(None, ) with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error')) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize( self.loss)
def feed_neural_work(self): with tf.name_scope('regression'): #W = tf.Variable(tf.zeros(shape = [(self.total_embedding_dim - self.filter_sizes[0] + 1) * self.num_filters * 2,2]),name = 'W') #ไฟฎๆน # self.represent=tf.concat([self.represent_imag,self.represent_real],1) # self.real_kernel,self.imag_kernel=self.set_weight(664,2) # cat_kernels_4_real = tf.concat([self.real_kernel, -self.imag_kernel],axis=-1) # cat_kernels_4_imag = tf.concat([self.imag_kernel, self.real_kernel],axis=-1) # cat_kernels_4_complex = tf.concat([cat_kernels_4_real, cat_kernels_4_imag],axis=0) # self.full_join_real_1=tf.matmul(self.represent,cat_kernels_4_complex) #ไฟฎๆน #ไนๅ # print(self.full_join_real_1) # exit() # self.full_join_real_1=tf.matmul(self.represent_real,self.real_kernel_1)-tf.matmul(self.represent_imag,self.imag_kernel_1) # self.full_join_imag_1=tf.matmul(self.represent_real,self.imag_kernel_1)+tf.matmul(self.represent_imag,self.real_kernel_1) # self.real_kernel_2,self.imag_kernel_2=self.set_weight(1,1) # # cat_kernels_4_real = tf.concat([self.real_kernel, -self.imag_kernel],axis=-1) # # cat_kernels_4_imag = tf.concat([self.imag_kernel, self.real_kernel],axis=-1) # # cat_kernels_4_complex = tf.concat([cat_kernels_4_real, cat_kernels_4_imag],axis=0) # self.full_join_real_2=tf.matmul(self.full_join_real_1,self.real_kernel_2)-tf.matmul(self.full_join_imag_1,self.imag_kernel_2) # self.full_join_imag_2=tf.matmul(self.full_join_real_1,self.imag_kernel_2)+tf.matmul(self.full_join_imag_1,self.real_kernel_2) # b = tf.get_variable('b_hidden', shape=[2],initializer = tf.random_normal_initializer()) # self.logits=tf.concat([self.full_join_real_2,self.full_join_imag_2],1)+b # self.logits = tf.nn.xw_plus_b(self.full_join_real_1, W, b, name = "scores") # self.concat_out = tf.matmul(self.represent, cat_kernels_4_complex) #ไนๅ regularizer = tf.contrib.layers.l2_regularizer(self.l2_reg_lambda) W = tf.get_variable( "W_hidden", #shape=[102,self.hidden_num], shape=[3 * self.num_filters + 2, self.hidden_num], initializer=tf.contrib.layers.xavier_initializer(), regularizer=regularizer) b = tf.get_variable('b_hidden', shape=[self.hidden_num], initializer=tf.random_normal_initializer(), regularizer=regularizer) self.para.append(W) self.para.append(b) self.hidden_output = tf.nn.tanh( tf.nn.xw_plus_b(self.represent, W, b, name="hidden_output")) #self.hidden_output=tf.nn.dropout(self.hidden_output, self.dropout_keep_prob, name="hidden_output_drop") W = tf.get_variable( "W_output", shape=[self.hidden_num, 2], initializer=tf.contrib.layers.xavier_initializer(), regularizer=regularizer) b = tf.get_variable('b_output', shape=[2], initializer=tf.random_normal_initializer(), regularizer=regularizer) self.para.append(W) self.para.append(b) self.logits = tf.nn.xw_plus_b(self.hidden_output, W, b, name="scores") print(self.logits) self.scores = tf.nn.softmax(self.logits) self.predictions = tf.argmax(self.scores, 1, name="predictions")
def _build_net(self): # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): # c_names(collections_names) are the collections to store variables c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_eval = tf.matmul(l1, w2) + b2 with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize( self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): # c_names(collections_names) are the collections to store variables c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] # first layer. collections is used later when assign to target net with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1) # second layer. collections is used later when assign to target net with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) self.q_next = tf.matmul(l1, w2) + b2
def CNN(): # g = tf.get_default_graph() # with g.gradient_override_map({'Relu': 'GuidedRelu'}): if init_method == 0: initializer_wb= tf.random_normal_initializer() elif init_method == 1: initializer_wb= tf.contrib.layers.xavier_initializer() elif init_method == 2: initializer_wb= tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG") conv_layer_1 = tf.layers.conv2d( inputs=shaper, filters=64, kernel_size=[3, 3], padding="same", strides=1, activation=tf.nn.relu, kernel_initializer=initializer_wb, name = "conv1") pool_layer_1 = tf.layers.max_pooling2d(inputs=conv_layer_1, pool_size=[2, 2], strides=2) conv_layer_2 = tf.layers.conv2d( inputs=pool_layer_1, filters=128, kernel_size=[3, 3], padding="same", activation=tf.nn.relu, kernel_initializer=initializer_wb) pool_layer_2 = tf.layers.max_pooling2d(inputs=conv_layer_2, pool_size=[2, 2], strides=2) conv_layer_3 = tf.layers.conv2d( inputs=pool_layer_2, filters=256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu, kernel_initializer=initializer_wb) conv_layer_4 = tf.layers.conv2d( inputs=conv_layer_3, filters=256, kernel_size=[3, 3], padding="same", activation=tf.nn.relu, kernel_initializer=initializer_wb) pool_layer_3 = tf.layers.max_pooling2d(inputs=conv_layer_4, pool_size=[2, 2], strides=2) grads_40 = [tf.gradients(conv_layer_4[0,i,0,:], X_ph) for i in np.arange(5)] grads_41 = [tf.gradients(conv_layer_4[0,i,1,:], X_ph) for i in np.arange(5)] flattened_layer = tf.reshape(pool_layer_3, [-1, 3 * 3 * 256]) dense_layer_1 = tf.layers.dense(inputs=flattened_layer, units=1024, activation=tf.nn.relu,kernel_initializer=initializer_wb) dense_layer_2 = tf.layers.dense(inputs=dense_layer_1, units=1024, activation=tf.nn.relu,kernel_initializer=initializer_wb) #dropped = tf.nn.dropout(dense_layer_2, dropout) #tf.contrib.layers.batch_norm(dropped,center=True, scale=True, is_training = training_phase,) logit_layer = tf.layers.dense(inputs=dense_layer_2, units=k) # output = tf.nn.softmax(logits, name="softmax_tensor") # classes = tf.argmax(input=output, axis=1) return logit_layer, grads_40, grads_41
def conv_layer(input_data, height, width, x_stride, y_stride, filter_num, name, activation_method="relu", alpha=0.2, padding="VALID", atrous=1, init_method="xavier", bias_term=True, is_pretrain=True): """ convolutional layer :param input_data: the input data tensor [batch_size, height, width, channels] :param height: the height of the convolutional kernel :param width: the width of the convolutional kernel :param x_stride: stride in X axis :param y_stride: stride in Y axis :param filter_num: the number of the convolutional kernel :param name: the name of the layer :param activation_method: the type of activation function (default: relu) :param alpha: leaky relu alpha (default: 0.2) :param padding: the padding method, "SAME" | "VALID" (default: "VALID") :param atrous: the dilation rate, if atrous == 1, conv, if atrous > 1, dilated conv (default: 1) :param init_method: the method of weights initialization (default: xavier) :param bias_term: whether the bias term exists or not (default: False) :param is_pretrain: whether the parameters are trainable (default: True) :return: output: a 4-D tensor [number, height, width, channel] """ channel = input_data.get_shape()[-1] # the method of weights initialization if init_method == "xavier": initializer = tf.contrib.layers.xavier_initializer() elif init_method == "gaussian": initializer = tf.random_normal_initializer(stddev=0.01) else: initializer = tf.truncated_normal_initializer(stddev=0.01) # the initialization of the weights and biases with tf.variable_scope(name): weights = tf.get_variable(name="weights", shape=[height, width, channel, filter_num], dtype=tf.float32, initializer=initializer, trainable=is_pretrain) biases = tf.get_variable(name="biases", shape=[filter_num], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=is_pretrain) # the method of convolution if atrous == 1: feature_map = tf.nn.conv2d(input=input_data, filter=weights, strides=[1, x_stride, y_stride, 1], padding=padding, name="conv") else: feature_map = tf.nn.atrous_conv2d(value=input_data, filters=weights, rate=atrous, padding=padding, name="atrous_conv") # biases term if bias_term is True: output = tf.nn.bias_add(value=feature_map, bias=biases, name="biases_add") else: output = feature_map # info show shape = output.get_shape() print("name: %s, shape: (%d, %d, %d), activation: %s" % (name, shape[1], shape[2], shape[3], activation_method)) # activation output = activation_layer(output, activation_method, alpha) return output
import tensorflow as tf from alphai_rickandmorty_oracle.architecture.abstract import AbstractGanArchitecture import alphai_rickandmorty_oracle.tflib as lib import alphai_rickandmorty_oracle.tflib.ops.linear import alphai_rickandmorty_oracle.tflib.ops.conv2d import alphai_rickandmorty_oracle.tflib.ops.deconv2d INIT_KERNEL = tf.random_normal_initializer(mean=0.0, stddev=0.02) OUTPUT_DIM = 784 # Number of pixels in MNIST (28*28) DIM = 64 Z_DIM = 128 DISC_FILTER_SIZE = 5 def leaky_relu(x, alpha=0.1, name=None): if name: with tf.variable_scope(name): return _impl_leaky_relu(x, alpha) else: return _impl_leaky_relu(x, alpha) def _impl_leaky_relu(x, alpha): return tf.nn.relu(x) - (alpha * tf.nn.relu(-x)) class BrainwavesGanArchitecture(AbstractGanArchitecture): def __init__(self, output_dimensions, plot_dimensions): super().__init__(output_dimensions, plot_dimensions)
def fc_layer(input_data, output_dim, name, activation_method="relu", alpha=None, init_method="xavier", is_train=True): """ fully-connected layer :param input_data: the input data :param output_dim: the dimension of the output data :param name: name of the layer :param activation_method: the type of activation function :param alpha: leakey relu alpha :param init_method: the method of weights initialization (default: xavier) :param is_train: if False, skip this layer, default is True :return: output: a 2-D tensor [batch_size, channel] """ if is_train is True: shape = input_data.get_shape() if len(shape) == 4: input_dim = shape[1].value * shape[2].value * shape[3].value else: input_dim = shape[-1].value flat_input_data = tf.reshape(input_data, [-1, input_dim]) if init_method is None: output = input_data else: with tf.variable_scope(name): # the method of weights initialization if init_method == "xavier": initializer = tf.contrib.layers.xavier_initializer() elif init_method == "gaussian": initializer = tf.random_normal_initializer(stddev=0.01) else: initializer = tf.truncated_normal_initializer(stddev=0.01) weights = tf.get_variable(name="weight", shape=[input_dim, output_dim], dtype=tf.float32, initializer=initializer) biases = tf.get_variable( name="biases", shape=[output_dim], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) output = tf.nn.bias_add(value=tf.matmul( flat_input_data, weights), bias=biases, name="fc_bias_add") output = activation_layer(input_data=output, activation_method=activation_method, alpha=alpha) print("name: %s, shape: %d -> %d, activation:%s, alpha = %r" % (name, input_dim, output_dim, activation_method, alpha)) else: output = input_data return output
def generator(z, batch_size, mode='train'): with tf.variable_scope("generator") as scope: if mode=='train': trainable = True pass elif mode=='sampler': trainable = False scope.reuse_variables() else: assert 0,'Unkown mode for generator.' s_h, s_w = dcgan.output_height, dcgan.output_width s_h2, s_w2 = cs.conv_out_size_same(s_h, 2), cs.conv_out_size_same(s_w, 2) s_h4, s_w4 = cs.conv_out_size_same(s_h2, 2), cs.conv_out_size_same(s_w2, 2) s_h8, s_w8 = cs.conv_out_size_same(s_h4, 2), cs.conv_out_size_same(s_w4, 2) s_h16, s_w16 = cs.conv_out_size_same(s_h8, 2), cs.conv_out_size_same(s_w8, 2) s_h32, s_w32 = cs.conv_out_size_same(s_h16, 2), cs.conv_out_size_same(s_w16, 2) s_h64, s_w64 = cs.conv_out_size_same(s_h32, 2), cs.conv_out_size_same(s_w32, 2) # assert s_h16*s_w16*self.gf_dim*8==z.shape[1],str(s_h16*s_w16*self.gf_dim*8)+' != '+str(z.shape[1]) # project `z` and reshape dcgan.z_ = tf.layers.dense(z,dcgan.gf_dim * 8 * s_h64 * s_w64, kernel_initializer=tf.random_normal_initializer(stddev=0.02), bias_initializer=tf.constant_initializer (0.01), use_bias=1,activation=None,name='g_h0_lin') with tf.variable_scope('g_h0_lin', reuse=True): dcgan.h0_w = tf.get_variable('kernel') dcgan.h0_b = tf.get_variable('bias') dcgan.h0 = tf.reshape(dcgan.z_, [batch_size, s_h64, s_w64, dcgan.gf_dim * 8]) h0 = tf.contrib.layers.batch_norm(dcgan.h0,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn0') h0 = tf.nn.relu(h0) dcgan.h1, dcgan.h1_w, dcgan.h1_b = cs.deconv2d(h0, [batch_size, s_h32, s_w32, dcgan.gf_dim * 8], name='g_h1', with_w=True) h1 = tf.contrib.layers.batch_norm(dcgan.h1,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn1') h1 = tf.nn.relu(h1) h2, dcgan.h2_w, dcgan.h2_b = cs.deconv2d(h1, [batch_size, s_h16, s_w16, dcgan.gf_dim * 4], name='g_h2', with_w=True) h2 = tf.contrib.layers.batch_norm(h2,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn2') h2 = tf.nn.relu(h2) h3, dcgan.h3_w, dcgan.h3_b = cs.deconv2d(h2, [batch_size, s_h8, s_w8, dcgan.gf_dim * 4], name='g_h3', with_w=True) h3 = tf.contrib.layers.batch_norm(h3,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn3') h3 = tf.nn.relu(h3) h4, dcgan.h4_w, dcgan.h4_b = cs.deconv2d(h3, [batch_size, s_h4, s_w4, dcgan.gf_dim * 2], name='g_h4', with_w=True) h4 = tf.contrib.layers.batch_norm(h4,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn4') h4 = tf.nn.relu(h4) h5, dcgan.h5_w, dcgan.h5_b = cs.deconv2d(h4, [batch_size, s_h2, s_w2, dcgan.gf_dim * 2], name='g_h5', with_w=True) h5 = tf.contrib.layers.batch_norm(h5,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn5') h5 = tf.nn.relu(h5) h6 = tf.layers.conv2d(inputs=h5, filters=dcgan.gf_dim * 2, kernel_size=[5,5], strides=(1,1),padding='same', activation=None, name='d_h6_conv') h6 = tf.contrib.layers.batch_norm(h6,decay=0.9,updates_collections=None, epsilon=1e-5,scale=True,is_training=trainable,scope='d_bn6') h7, dcgan.h7_w, dcgan.h7_b = cs.deconv2d(h6, [batch_size, s_h, s_w, dcgan.c_dim], name='g_h7', with_w=True) return tf.nn.tanh(h7)
def _build_loss(self, lstm_outputs): ''' Create: self.total_loss: total loss op for training self.softmax_W, softmax_b: the softmax variables self.next_token_id / _reverse: placeholders for gold input ''' batch_size = self.options['batch_size'] unroll_steps = self.options['unroll_steps'] n_tokens_vocab = self.options['n_tokens_vocab'] # DEFINE next_token_id and *_reverse placeholders for the gold input def _get_next_token_placeholders(suffix): name = 'next_token_id' + suffix id_placeholder = tf.placeholder(DTYPE_INT, shape=(batch_size, unroll_steps), name=name) return id_placeholder # get the window and weight placeholders self.next_token_id = _get_next_token_placeholders('') if self.bidirectional: self.next_token_id_reverse = _get_next_token_placeholders( '_reverse') # DEFINE THE SOFTMAX VARIABLES # get the dimension of the softmax weights # softmax dimension is the size of the output projection_dim softmax_dim = self.options['lstm']['projection_dim'] # the output softmax variables -- they are shared if bidirectional if self.share_embedding_softmax: # softmax_W is just the embedding layer self.softmax_W = self.embedding_weights with tf.variable_scope('softmax'), tf.device('/cpu:0'): # Glorit init (std=(1.0 / sqrt(fan_in)) softmax_init = tf.random_normal_initializer( 0.0, 1.0 / np.sqrt(softmax_dim)) if not self.share_embedding_softmax: self.softmax_W = tf.get_variable('W', [n_tokens_vocab, softmax_dim], dtype=DTYPE, initializer=softmax_init) self.softmax_b = tf.get_variable( 'b', [n_tokens_vocab], dtype=DTYPE, initializer=tf.constant_initializer(0.0)) # now calculate losses # loss for each direction of the LSTM self.individual_losses = [] if self.bidirectional: next_ids = [self.next_token_id, self.next_token_id_reverse] else: next_ids = [self.next_token_id] #If bidirectional, there are two ids, one is forward, the other is backward for id_placeholder, lstm_output_flat in zip(next_ids, lstm_outputs): # flatten the LSTM output and next token id gold to shape: # (batch_size * unroll_steps, softmax_dim) # Flatten and reshape the token_id placeholders next_token_id_flat = tf.reshape(id_placeholder, [-1, 1]) with tf.control_dependencies([lstm_output_flat]): if self.is_training and self.sample_softmax: losses = tf.nn.sampled_softmax_loss( self.softmax_W, self.softmax_b, next_token_id_flat, lstm_output_flat, self.options['n_negative_samples_batch'], self.options['n_tokens_vocab'], num_true=1) else: # get the full softmax loss output_scores = tf.matmul( lstm_output_flat, tf.transpose( self.softmax_W)) + self.softmax_b # NOTE: tf.nn.sparse_softmax_cross_entropy_with_logits # expects unnormalized output since it performs the # softmax internally losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=output_scores, labels=tf.squeeze(next_token_id_flat, squeeze_dims=[1])) self.individual_losses.append(tf.reduce_mean(losses)) # now make the total loss -- it's the mean of the individual losses if self.bidirectional: self.total_loss = 0.5 * (self.individual_losses[0] + self.individual_losses[1]) else: self.total_loss = self.individual_losses[0]
def dmnrun(fulldata, queask): # Loading saved meta graph sess = tf.Session() saver = tf.train.import_meta_graph("C:/Users/Mark/PycharmProjects/DMNTrain/weights/model.meta") saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Mark/PycharmProjects/DMNTrain/weights')) tf.reset_default_graph() def wideArray(x, weight): wide = np.zeros([len(x), weight]) for i in range(0, len(x)): for j in range(0, len(x[i])): wide[i][j] = x[i][j] return wide def octalConv(x): ans = [] rows = [] words = [] for line in x.split(' '): for word in line: number = ord(word) convNum = oct(number) convNum = int(convNum[2:]) rows.append(ans) ans = [] words.append(line) ans = wideArray(rows, 50) return ans, words def contextualize(data, quest): """ Read in the input and question and build a context sets. Output is a list of data points, each of which is a 7-element tuple containing: The sentences in the context in vectorized form. The sentences in the context as a list of string tokens. The question in vectorized form. The question as a list of string tokens. The answer in vectorized form. The answer as a list of string tokens. A list of numbers for supporting statements, which is currently unused. """ output = [] context = [] for entry in data: # Turn input into a word vector # TODO: Change to Octal Decimal encoding context.append(octalConv(entry[:-1])) # Wrap up object so DMN can use it comp_context = tuple(zip(*context)) output.append(comp_context + octalConv(quest) + octalConv('Nothing') + (0,)) return output test_data = contextualize(fulldata, queask) final_train_data = [] def finalize(data): """ Prepares data generated by contextualize() for use in the network. """ final_data = [] for cqas in data: contextvs, contextws, qvs, qws, avs, aws, spt = cqas lspt = [spt] lengths = itertools.accumulate(len(cvec) for cvec in contextvs) context_vec = np.concatenate(contextvs) context_words = sum(contextws, []) # Location markers for the beginnings of new sentences. sentence_ends = np.array(list(lengths)) final_data.append((context_vec, sentence_ends, qvs, lspt, context_words, cqas, avs, aws)) return np.array(final_data) final_test_data = finalize(test_data) tf.reset_default_graph() # Hyperparameters # The number of dimensions used to store data passed between recurrent layers in the network. recurrent_cell_size = 128 # The number of dimensions in our word vectorizations. D = 50 # How quickly the network learns. Too high, and we may run into numeric instability # or other issues. learning_rate = 0.005 # Dropout probabilities. For a description of dropout and what these probabilities are, # see Entailment with TensorFlow. input_p, output_p = 0.5, 0.5 # How many questions we train on at a time. batch_size = 128 # Number of passes in episodic memory. We'll get to this later. passes = 4 # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers. ff_hidden_size = 256 weight_decay = 0.00000001 # The strength of our regularization. Increase to encourage sparsity in episodic memory, # but makes training slower. Don't make this larger than leraning_rate. training_iterations_count = 400000 # How many questions the network trains on each time it is trained. # Some questions are counted multiple times. display_step = 1 # How many iterations of training occur before each validation check. # Input Module # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor # that contains all the context information. context = tf.placeholder(tf.float32, [None, None, D], "context") context_placeholder = context # I use context as a variable name later on # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that # contains the locations of the ends of sentences. input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence") # recurrent_cell_size: the number of hidden units in recurrent layers. input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # input_p: The probability of maintaining a specific hidden input unit. # Likewise, output_p is the probability of maintaining a specific hidden output unit. gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p) # dynamic_rnn also returns the final internal state. We don't need that, and can # ignore the corresponding output (_). input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module") # cs: the facts gathered from the context. cs = tf.gather_nd(input_module_outputs, input_sentence_endings) # to use every word as a fact, useful for tasks with one-sentence contexts s = input_module_outputs # Question Module # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor # that contains all of the questions. query = tf.placeholder(tf.float32, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope=tf.VariableScope(True, "input_module")) # q: the question states. A [batch_size, recurrent_cell_size] tensor. q = tf.gather_nd(question_module_outputs, input_query_lengths) # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # Final output for attention, needs to be 1 in order to create a mask output_size = 1 # Weights and biases attend_init = tf.random_normal_initializer(stddev=0.1) w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float32, initializer=attend_init) w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float32, initializer=attend_init) b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float32, initializer=attend_init) b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float32, initializer=attend_init) # Regulate all the weights and biases tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2)) def attention(c, mem, existing_facts): """ Custom attention mechanism. c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all the facts from the contexts. mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains the current memory. It should be the same memory for all facts for accurate results. existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that acts as a binary mask for which facts exist and which do not. """ with tf.variable_scope("attending") as scope: # attending: The metrics by which we decide what to attend to. attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2) # m1: First layer of multiplied weights for the feed-forward network. # We tile the weights in order to manually broadcast, since tf.matmul does not # automatically broadcast batch matrix multiplication as of TensorFlow 1.2. m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts # bias_1: A masked version of the first feed-forward layer's bias # over only existing facts. bias_1 = b_1 * existing_facts # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; # choosing relu was a design choice intended to avoid issues with # low gradient magnitude when the tanh returned values close to 1 or -1. tnhan = tf.nn.relu(m1 + bias_1) # m2: Second layer of multiplied weights for the feed-forward network. # Still tiling weights for the same reason described in m1's comments. m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1]))) # bias_2: A masked version of the second feed-forward layer's bias. bias_2 = b_2 * existing_facts # norm_m2: A normalized version of the second layer of weights, which is used # to help make sure the softmax nonlinearity doesn't saturate. norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1) # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor. # We make norm_m2 a sparse tensor, then make it dense again after the operation. softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1] softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx) softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1] softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1) # facts_0s: a [batch_size, max_facts_length, 1] tensor # whose values are 1 if the corresponding fact exists and 0 if not. facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32) with tf.variable_scope("Episodes") as scope: attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # memory: A list of all tensors that are the (current or past) memory state # of the attention mechanism. memory = [q] # attends: A list of all tensors that represent what the network attends to. attends = [] for a in range(passes): # attention mask attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s) # Inverse attention mask, for what's retained in the state. retain = 1 - attend_to # GRU pass over the facts, according to the attention mask. while_valid_index = (lambda state, index: index < tf.shape(cs)[1]) update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state)) # start loop with most recent memory and at the first index memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0]) attends.append(attend_to) # Reuse variables so the GRU pass uses the same variables every pass. scope.reuse_variables() # Answer Module # a0: Final memory state. (Input to answer module) a0 = tf.concat([memory[-1], q], -1) # fc_init: Initializer for the final fully connected layer's weights. fc_init = tf.random_normal_initializer(stddev=0.1) with tf.variable_scope("answer"): # w_answer: The final fully connected layer's weights. w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float32, initializer=fc_init) # Regulate the fully connected layer's weights tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims(tf.matmul(a0, w_answer), 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims( tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1, name='logits') # Training # gold_standard: The real answers. gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer") with tf.variable_scope('accuracy'): eq = tf.equal(context, gold_standard) corrbool = tf.reduce_all(eq, -1, name='corrbool') logloc = tf.reduce_max(logits, -1, keepdims=True) # locs: A boolean tensor that indicates where the score # matches the minimum score. This happens on multiple dimensions, # so in the off chance there's one or two indexes that match # we make sure it matches in all indexes. locs = tf.equal(logits, logloc) # correctsbool: A boolean tensor that indicates for which # words in the context the score always matches the minimum score. correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1) # corrects: A tensor that is simply correctsbool cast to floats. corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32), tf.zeros_like(correctsbool, dtype=tf.float32)) # corr: corrects, but for the right answer instead of our selected answer. corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32), tf.zeros_like(corrbool, dtype=tf.float32)) with tf.variable_scope("loss"): # Use sigmoid cross entropy as the base loss, # with our distances as the relative probabilities. There are # multiple correct labels, for each location of the answer word within the context. loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1), labels=corr) # Add regularization losses, weighted by weight_decay. total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than # just the learning rate, but it's not necessary to find a very good optimum. optimizer = tf.train.AdamOptimizer(learning_rate) # Once we have an optimizer, we ask it to minimize the loss # in order to work towards the proper training. opt_op = optimizer.minimize(total_loss) # Initialize variables init = tf.global_variables_initializer() # Launch the TensorFlow session sess = tf.Session() sess.run(init) def prep_batch(batch_data, more_data=False): """ Prepare all the preproccessing that needs to be done on a batch-by-batch basis. """ context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data) ends = list(sentence_ends) maxend = max(map(len, ends)) aends = np.zeros((len(ends), maxend)) for index, i in enumerate(ends): for indexj, x in enumerate(i): aends[index, indexj] = x - 1 new_ends = np.zeros(aends.shape + (2,)) for index, x in np.ndenumerate(aends): new_ends[index + (0,)] = index[0] new_ends[index + (1,)] = x contexts = list(context_vec) max_context_length = max([len(x) for x in contexts]) contextsize = list(np.array(contexts[0]).shape) contextsize[0] = max_context_length final_contexts = np.zeros([len(contexts)] + contextsize) contexts = [np.array(x) for x in contexts] for i, context in enumerate(contexts): final_contexts[i, 0:len(context), :] = context max_query_length = max(len(x) for x in questionvs) querysize = list(np.array(questionvs[0]).shape) querysize[:1] = [len(questionvs), max_query_length] queries = np.zeros(querysize) querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs]))) questions = [np.array(q) for q in questionvs] for i, question in enumerate(questions): queries[i, 0:len(question), :] = question data = {context_placeholder: final_contexts, input_sentence_endings: new_ends, query: queries, input_query_lengths: querylengths, gold_standard: answervs} return (data, context_words, cqas) if more_data else data # Use TQDM if installed tqdm_installed = False # Prepare validation set batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10) batch_data = final_test_data[batch] validation_set, val_context_words, val_cqas = prep_batch(batch_data, True) holder = [corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs] print('Starting session') start_time = time.time() ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs], feed_dict=validation_set) elapsed_time = time.time() - start_time print(elapsed_time) a = ancr[0] n = ancr[1] cr = ancr[2] attenders = np.array(ancr[6:-3]) faq = np.sum(ancr[4], axis=(-1, -2)) # Number of facts in each context limit = 1 # Locations of responses within contexts indices = np.argmax(n, axis=1) # Locations of actual answers within contexts indicesc = np.argmax(a, axis=1) response = "" ans = 0 inp = '' for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]: ccc = " ".join(cw) print("TEXT: ", ccc) inp = ccc print("QUESTION: ", " ".join(cqa[3])) print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e]) ans = i print("EXPECTED: ", cw[e]) print() # For safety, return this if nothing is found sess.close() print('--') tot_index = 0 for line in fulldata: tot_index = tot_index + len(line) if tot_index >= ans: return line return response