Beispiel #1
0
def build_classifier():

    global out, encodings_batch, labels_batch, c_vars_saver, loss, merged_sum

    with tf.variable_scope("classifier"):
        net = lrelu(tf.layers.dense(encodings_batch, 64, kernel_initializer=tf.random_normal_initializer(stddev=0.02),
                                    bias_initializer=tf.constant_initializer(0.0), trainable=is_training,
                                    name='hidden1'))

        net = lrelu(tf.layers.dense(net, 64, kernel_initializer=tf.random_normal_initializer(stddev=0.02),
                                    bias_initializer=tf.constant_initializer(0.0), trainable=is_training,
                                    name='hidden2'))

        out = tf.layers.dense(net, 5, kernel_initializer=tf.random_normal_initializer(stddev=0.02),
                                    bias_initializer=tf.constant_initializer(0.0), trainable=is_training,
                                    name='out')

        loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_batch, logits=out)

        c_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='classifier/')

        print(c_vars)
        print("aay")

        c_vars_summary = []

        loss_summary = tf.summary.scalar('loss', tf.reduce_mean(loss))

        for var in c_vars:
            c_vars_summary.append(tf.summary.histogram(var.name, var))

        merged_sum = tf.summary.merge([loss_summary] + c_vars_summary)

        c_vars_saver = tf.train.Saver(var_list=c_vars, max_to_keep=5)
  def _build_network(self, is_training=True):
    # select initializers
    if cfg.TRAIN.TRUNCATED:
      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
    else:
      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    net_conv = self._image_to_head(is_training)
    with tf.variable_scope(self._scope, self._scope):
      # build the anchors for the image
      self._anchor_component()
      # region proposal network
      rois = self._region_proposal(net_conv, is_training, initializer)
      # region of interest pooling
      if cfg.POOLING_MODE == 'crop':
        pool5 = self._crop_pool_layer(net_conv, rois, "pool5")
      else:
        raise NotImplementedError

    fc7 = self._head_to_tail(pool5, is_training)
    with tf.variable_scope(self._scope, self._scope):
      # region classification
      cls_prob, bbox_pred = self._region_classification(fc7, is_training, 
                                                        initializer, initializer_bbox)

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred
    def __init__(self, sess, n_features, lr=0.01):
        self.sess = sess
        with tf.name_scope('inputs'):
            self.s = tf.placeholder(tf.float32, [1, n_features], "state")
            self.v_ = tf.placeholder(tf.float32, [1, 1], name="v_next")
            self.r = tf.placeholder(tf.float32, name='r')

        with tf.variable_scope('Critic'):
            l1 = tf.layers.dense(
                inputs=self.s,
                units=30,  # number of hidden units
                activation=tf.nn.relu,
                kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='l1'
            )

            self.v = tf.layers.dense(
                inputs=l1,
                units=1,  # output units
                activation=None,
                kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='V'
            )

        with tf.variable_scope('squared_TD_error'):
            self.td_error = tf.reduce_mean(self.r + GAMMA * self.v_ - self.v)
            self.loss = tf.square(self.td_error)    # TD_error = (r+gamma*V_next) - V_eval
        with tf.variable_scope('train'):
            self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
Beispiel #4
0
  def model(self, images, input_size, output_size, isEval=None):
      with tf.variable_scope('hidden1', reuse=isEval):
          weights = tf.get_variable("weights", [input_size, self.hidden1_units],
              initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(input_size)),
                        seed=self.SEED))    
          biases = tf.get_variable("biases", [self.hidden1_units],
              initializer=tf.constant_initializer(0.0))
  
      reg_hidden1 = tf.nn.l2_loss(weights) 
      hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
 
      with tf.variable_scope('softmax_linear', reuse=isEval):
          weights = tf.get_variable("weights", [self.hidden1_units, output_size],
              initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(self.hidden1_units)),
                        seed=self.SEED))
      
          biases = tf.get_variable("biases", [output_size],
              initializer=tf.constant_initializer(0.0))
  
          logits = tf.matmul(hidden1, weights) + biases
          reg_linear = tf.nn.l2_loss(weights) 
      
      if isEval:  
          return logits
      else:
          regularizers = (reg_hidden1 + reg_linear)
              return (logits, regularizers)
Beispiel #5
0
    def __init__(self, n_inputs, n_rules, learning_rate=1e-2):
        self.n = n_inputs
        self.m = n_rules
        self.inputs = tf.placeholder(tf.float32, shape=(None, n_inputs))  # Input
        self.targets = tf.placeholder(tf.float32, shape=None)  # Desired output
        mu = tf.get_variable("mu", [n_rules * n_inputs],
                             initializer=tf.random_normal_initializer(0, 1))  # Means of Gaussian MFS
        sigma = tf.get_variable("sigma", [n_rules * n_inputs],
                                initializer=tf.random_normal_initializer(0, 1))  # Standard deviations of Gaussian MFS
        y = tf.get_variable("y", [1, n_rules], initializer=tf.random_normal_initializer(0, 1))  # Sequent centers

        self.params = tf.trainable_variables()

        self.rul = tf.reduce_prod(
            tf.reshape(tf.exp(-0.5 * tf.square(tf.subtract(tf.tile(self.inputs, (1, n_rules)), mu)) / tf.square(sigma)),
                       (-1, n_rules, n_inputs)), axis=2)  # Rule activations
        # Fuzzy base expansion function:
        num = tf.reduce_sum(tf.multiply(self.rul, y), axis=1)
        den = tf.clip_by_value(tf.reduce_sum(self.rul, axis=1), 1e-12, 1e12)
        self.out = tf.divide(num, den)

        self.loss = tf.losses.huber_loss(self.targets, self.out)  # Loss function computation
        # Other loss functions for regression, uncomment to try them:
        # loss = tf.sqrt(tf.losses.mean_squared_error(target, out))
        # loss = tf.losses.absolute_difference(target, out)
        self.optimize = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss)  # Optimization step
        # Other optimizers, uncomment to try them:
        # self.optimize = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(self.loss)
        # self.optimize = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(self.loss)
        self.init_variables = tf.global_variables_initializer()  # Variable initializer
Beispiel #6
0
def SRGAN_g(t_image, is_train=False, reuse=False):
    """ Generator in Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
    feature maps (n) and stride (s) feature maps (n) and stride (s)
    """
    w_init = tf.random_normal_initializer(stddev=0.02)
    b_init = None  # tf.constant_initializer(value=0.0)
    g_init = tf.random_normal_initializer(1., 0.02)
    with tf.variable_scope("SRGAN_g", reuse=reuse) as vs:
        # tl.layers.set_name_reuse(reuse) # remove for TL 1.8.0+
        n = InputLayer(t_image, name='in')
        n = Conv2d(n, 64, (3, 3), (1, 1), act=tf.nn.relu, padding='SAME', W_init=w_init, name='n64s1/c')
        temp = n

        # B residual blocks
        for i in range(16):
            nn = Conv2d(n, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c1/%s' % i)
            nn = BatchNormLayer(nn, act=tf.nn.relu, is_train=is_train, gamma_init=g_init, name='n64s1/b1/%s' % i)
            nn = Conv2d(nn, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c2/%s' % i)
            nn = BatchNormLayer(nn, is_train=is_train, gamma_init=g_init, name='n64s1/b2/%s' % i)
            nn = ElementwiseLayer([n, nn], tf.add, name='b_residual_add/%s' % i)
            n = nn

        n = Conv2d(n, 64, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='n64s1/c/m')
        n = BatchNormLayer(n, is_train=is_train, gamma_init=g_init, name='n64s1/b/m')
        n = ElementwiseLayer([n, temp], tf.add, name='add3')
        # B residual blacks end

        n = Conv2d(n, 256, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, name='n256s1/1')
        n = SubpixelConv2d(n, scale=2, n_out_channel=None, act=tf.nn.relu, name='pixelshufflerx2/1')

        n = Conv2d(n, 256, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, name='n256s1/2')
        n = SubpixelConv2d(n, scale=2, n_out_channel=None, act=tf.nn.relu, name='pixelshufflerx2/2')

        n = Conv2d(n, 3, (1, 1), (1, 1), act=tf.nn.tanh, padding='SAME', W_init=w_init, name='out')
        return n
Beispiel #7
0
    def _build_net(self):
        with tf.name_scope('inputs'):
            self.tf_obs=tf.placeholder(tf.float32,[None,self.n_features],name="observations")
            self.tf_acts=tf.placeholder(tf.int32,[None,],name="actions_num")
            self.tf_vt=tf.placeholder(tf.float32,[None,],name="actions_value")

        layer=tf.layers.dense(
            inputs=self.tf_obs,
            units=10,
            activation=tf.nn.tanh
            kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3),
            bias_initializer=tf.constant_initializer(0.1),
            name='fc1'

        )

        all_act=tf.layers.dense(
            inputs=layer,
            units=self.n_actions,
            activation=None,
            kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3)
            bias_initializer=tf.constant_initializer(0.1)
            name='fc2'


        )

        self.all_act_prob=tf.nn.softmax(all_act,name='act_prob')

        with tf.name_scope('loss'):
            neg_log_prob=tf.nn.sparse_softmax_cross_enrtropy_with_logits(logits=all_act,labels=self.tf_acts)
            loss=tf.reduce_mean(neg_log_prob*self.tf_vt)#用log_p*R的最大化来表示目标

        with tf.name_scope('train'):
            self.train_op=tf.train.AdamOptimizer(self.lr).minimize(loss)
    def _build_net(self):
        with tf.name_scope('inputs'):
            self.tf_obs = tf.placeholder(tf.float32, [None, self.n_features], name="observations")
            self.tf_acts = tf.placeholder(tf.int32, [None, ], name="actions_num")
            self.tf_vt = tf.placeholder(tf.float32, [None, ], name="actions_value")
        # fc1
        layer = tf.layers.dense(
            inputs=self.tf_obs,
            units=10,
            activation=tf.nn.tanh,  # tanh activation
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
            bias_initializer=tf.constant_initializer(0.1),
            name='fc1'
        )
        # fc2
        all_act = tf.layers.dense(
            inputs=layer,
            units=self.n_actions,
            activation=None,
            kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3),
            bias_initializer=tf.constant_initializer(0.1),
            name='fc2'
        )

        self.all_act_prob = tf.nn.softmax(all_act, name='act_prob')  # use softmax to convert to probability

        with tf.name_scope('loss'):
            # to maximize total reward (log_p * R) is to minimize -(log_p * R), and the tf only have minimize(loss)
            neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=all_act, labels=self.tf_acts)   # this is negative log of chosen action
            # or in this way:
            # neg_log_prob = tf.reduce_sum(-tf.log(self.all_act_prob)*tf.one_hot(self.tf_acts, self.n_actions), axis=1)
            loss = tf.reduce_mean(neg_log_prob * self.tf_vt)  # reward guided loss

        with tf.name_scope('train'):
            self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
Beispiel #9
0
def discriminator(X, reuse=False):
    with tf.variable_scope('discriminator'):
        if reuse:
            tf.get_variable_scope().reuse_variables()

        K = 64
        M = 128
        N = 256

        W1 = tf.get_variable('D_W1', [4, 4, 1, K], initializer=tf.random_normal_initializer(stddev=0.1))
        B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer())
        W2 = tf.get_variable('D_W2', [4, 4, K, M], initializer=tf.random_normal_initializer(stddev=0.1))
        B2 = tf.get_variable('D_B2', [M], initializer=tf.constant_initializer())
        W3 = tf.get_variable('D_W3', [7*7*M, N], initializer=tf.random_normal_initializer(stddev=0.1))
        B3 = tf.get_variable('D_B3', [N], initializer=tf.constant_initializer())
        W4 = tf.get_variable('D_W4', [N, 1], initializer=tf.random_normal_initializer(stddev=0.1))
        B4 = tf.get_variable('D_B4', [1], initializer=tf.constant_initializer())

        X = tf.reshape(X, [-1, 28, 28, 1], 'reshape')

        conv1 = conv(X, W1, B1, stride=2, name='conv1')
        bn1 = tf.contrib.layers.batch_norm(conv1)
        conv2 = conv(tf.nn.dropout(lrelu(bn1), 0.4), W2, B2, stride=2, name='conv2')
        # conv2 = conv(lrelu(conv1), W2, B2, stride=2, name='conv2')

        bn2 = tf.contrib.layers.batch_norm(conv2)
        flat = tf.reshape(tf.nn.dropout(lrelu(bn2), 0.4), [-1, 7*7*M], name='flat')
        # flat = tf.reshape(lrelu(conv2), [-1, 7*7*M], name='flat')

        dense = lrelu(tf.matmul(flat, W3) + B3)
        logits = tf.matmul(dense, W4) + B4
        prob = tf.nn.sigmoid(logits)
        return prob, logits
Beispiel #10
0
    def _build_cnn(self, feat_x):

        with tf.variable_scope("cnn_global", reuse=True):
            W1 = tf.get_variable(dtype=tf.float32,
                                shape=[self.filter_stride, self.dim_feat_x, 1, self.num_feat_map],
                                name="weight_w1",
                                initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
            b1 = tf.get_variable(dtype=tf.float32,
                                 shape=[self.num_feat_map], name="bias_b1", initializer=tf.constant_initializer(1.0))

        x_inputs = tf.reshape(feat_x, [-1, self.window_size, self.dim_feat_x, 1])
        # print x_inputs.get_shape()

        # h_conv_1 size: [-1, dwf, ws, nfm]
        h_conv_1 = tf.nn.relu(self._conv_2d(x_inputs, W1) + b1)
        # print h_conv_1.get_shape()

        # h_max_pool size: [-1, 1,1, nfm]
        h_max_pool = self._max_pool(h_conv_1)
        # print h_max_pool.get_shape()

        # concentrate in one vector
        # sent_vec size: [-1, nfm]
        sent_vec = tf.reshape(h_max_pool, [-1, self.num_feat_map])
        # print sent_vec.get_shape()
        with tf.variable_scope("cnn_global", reuse=True):
            W2 = tf.get_variable(dtype=tf.float32,
                                 shape=[self.num_feat_map, self.output_size], name="weight_w2",
                                 initializer=tf.random_normal_initializer(mean=0.0, stddev=0.1))
            b2 = tf.get_variable(dtype=tf.float32,
                                 shape=[self.output_size], name="bias_b2", initializer=tf.constant_initializer(1.0))

        logits = tf.matmul(sent_vec, W2) + b2

        return logits
Beispiel #11
0
def generator(X, batch_size=64):
    with tf.variable_scope('generator'):

        K = 256
        L = 128
        M = 64

        W1 = tf.get_variable('G_W1', [100, 7*7*K], initializer=tf.random_normal_initializer(stddev=0.1))
        B1 = tf.get_variable('G_B1', [7*7*K], initializer=tf.constant_initializer())

        W2 = tf.get_variable('G_W2', [4, 4, M, K], initializer=tf.random_normal_initializer(stddev=0.1))
        B2 = tf.get_variable('G_B2', [M], initializer=tf.constant_initializer())

        W3 = tf.get_variable('G_W3', [4, 4, 1, M], initializer=tf.random_normal_initializer(stddev=0.1))
        B3 = tf.get_variable('G_B3', [1], initializer=tf.constant_initializer())

        X = lrelu(tf.matmul(X, W1) + B1)
        X = tf.reshape(X, [batch_size, 7, 7, K])
        deconv1 = deconv(X, W2, B2, shape=[batch_size, 14, 14, M], stride=2, name='deconv1')
        bn1 = tf.contrib.layers.batch_norm(deconv1)
        deconv2 = deconv(tf.nn.dropout(lrelu(bn1), 0.4), W3, B3, shape=[batch_size, 28, 28, 1], stride=2, name='deconv2')

        XX = tf.reshape(deconv2, [-1, 28*28], 'reshape')

        return tf.nn.sigmoid(XX)
Beispiel #12
0
def bpr_mf(user_count,item_count,hidden_dim):
    u = tf.placeholder(tf.int32,[None])
    i = tf.placeholder(tf.int32,[None])
    j = tf.placeholder(tf.int32,[None])

    user_emb_w = tf.get_variable("user_emb_w", [user_count + 1, hidden_dim],
                                 initializer=tf.random_normal_initializer(0, 0.1))
    item_emb_w = tf.get_variable("item_emb_w", [item_count + 1, hidden_dim],
                                 initializer=tf.random_normal_initializer(0, 0.1))

    u_emb = tf.nn.embedding_lookup(user_emb_w, u)
    i_emb = tf.nn.embedding_lookup(item_emb_w, i)
    j_emb = tf.nn.embedding_lookup(item_emb_w, j)


    x = tf.reduce_sum(tf.multiply(u_emb,(i_emb-j_emb)),1,keep_dims=True)

    mf_auc = tf.reduce_mean(tf.to_float(x>0))

    l2_norm = tf.add_n([
        tf.reduce_sum(tf.multiply(u_emb, u_emb)),
        tf.reduce_sum(tf.multiply(i_emb, i_emb)),
        tf.reduce_sum(tf.multiply(j_emb, j_emb))
    ])

    regulation_rate = 0.0001
    bprloss = regulation_rate * l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))

    train_op = tf.train.GradientDescentOptimizer(0.01).minimize(bprloss)
    return u, i, j, mf_auc, bprloss, train_op
Beispiel #13
0
def model(hparams, X, past=None, scope='model', reuse=False):
    with tf.variable_scope(scope, reuse=reuse):
        results = {}
        batch, sequence = shape_list(X)

        wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd],
                             initializer=tf.random_normal_initializer(stddev=0.01))
        wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd],
                             initializer=tf.random_normal_initializer(stddev=0.02))
        past_length = 0 if past is None else tf.shape(past)[-2]
        h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length))

        # Transformer
        presents = []
        pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer
        assert len(pasts) == hparams.n_layer
        for layer, past in enumerate(pasts):
            h, present = block(h, 'h%d' % layer, past=past, hparams=hparams)
            presents.append(present)
        results['present'] = tf.stack(presents, axis=1)
        h = norm(h, 'ln_f')

        # Language model loss.  Do tokens <n predict token n?
        h_flat = tf.reshape(h, [batch*sequence, hparams.n_embd])
        logits = tf.matmul(h_flat, wte, transpose_b=True)
        logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab])
        results['logits'] = logits
        return results
Beispiel #14
0
def weight(name, shape, init='he', range=None):
    """ Initializes weight.
    :param name: Variable name
    :param shape: Tensor shape
    :param init: Init mode. xavier / normal / uniform / he (default is 'he')
    :param range:
    :return: Variable
    """
    initializer = tf.constant_initializer()
    if init == 'xavier':
        fan_in, fan_out = _get_dims(shape)
        range = math.sqrt(6.0 / (fan_in + fan_out))
        initializer = tf.random_uniform_initializer(-range, range)

    elif init == 'he':
        fan_in, _ = _get_dims(shape)
        std = math.sqrt(2.0 / fan_in)
        initializer = tf.random_normal_initializer(stddev=std)

    elif init == 'normal':
        initializer = tf.random_normal_initializer(stddev=0.1)

    elif init == 'uniform':
        if range is None:
            raise ValueError("range must not be None if uniform init is used.")
        initializer = tf.random_uniform_initializer(-range, range)

    var = tf.get_variable(name, shape, initializer=initializer)
    tf.add_to_collection('l2', tf.nn.l2_loss(var))  # Add L2 Loss
    return var
def cnn_inference(inputs, input_units, output_units, is_train=True,
                  FLAGS=None):
  """
    Define the CNN model.
    """

  # [BATCH_SIZE, 9] -> [BATCH_SIZE, 3, 3, 1]
  inputs = tf.reshape(inputs, [-1, 3, 3, 1])

  # [BATCH_SIZE, 3, 3, 1] -> [BATCH_SIZE, 3, 3, 8]
  with tf.variable_scope("conv_0"):
    weights = tf.get_variable(
        "weights", [3, 3, 1, 8], initializer=tf.random_normal_initializer())
    bias = tf.get_variable(
        "bias", [8], initializer=tf.random_normal_initializer())

    layer = tf.nn.conv2d(inputs, weights, strides=[1, 1, 1, 1], padding="SAME")
    layer = tf.nn.bias_add(layer, bias)
    layer = tf.nn.relu(layer)

  # [BATCH_SIZE, 3, 3, 8] -> [BATCH_SIZE, 3 * 3 * 8]
  layer = tf.reshape(layer, [-1, 3 * 3 * 8])

  # [BATCH_SIZE, 3 * 3 * 8] -> [BATCH_SIZE, LABEL_SIZE]
  with tf.variable_scope("output_layer"):
    weights = tf.get_variable(
        "weights", [3 * 3 * 8, FLAGS.label_size],
        initializer=tf.random_normal_initializer())
    bias = tf.get_variable(
        "bias", [FLAGS.label_size], initializer=tf.random_normal_initializer())
    layer = tf.add(tf.matmul(layer, weights), bias)

  return layer
  def create_positional_emb_2d(self, targets):
    """Learned 2d positional embedding for images."""
    mesh = targets.mesh

    positional_emb_rows_var = mtf.get_variable(
        mesh, "positional_emb_rows",
        mtf.Shape([self.pos_dim, self.model_dim]),
        initializer=tf.random_normal_initializer(),
        activation_dtype=self.activation_type)
    positional_emb_cols_var = mtf.get_variable(
        mesh, "positional_emb_cols",
        mtf.Shape([self.pos_dim, self.model_dim]),
        initializer=tf.random_normal_initializer(),
        activation_dtype=self.activation_type)

    targets_position_x = mtf.range(mesh, self.rows_dim, dtype=tf.int32)
    targets_position_y = mtf.range(mesh, self.cols_dim, dtype=tf.int32)
    position_x = mtf.broadcast(
        mtf.gather(positional_emb_rows_var, targets_position_x,
                   self.pos_dim),
        mtf.Shape([self.rows_dim, self.cols_dim, self.model_dim]))

    position_y = mtf.broadcast(
        mtf.gather(positional_emb_cols_var, targets_position_y,
                   self.pos_dim),
        mtf.Shape([self.rows_dim, self.cols_dim, self.model_dim]))
    return position_x + position_y
    def __init__(self, sess, n_features, lr=0.01):
        self.sess = sess

        self.s = tf.placeholder(tf.float32, [1, n_features], "state")
        self.v_ = tf.placeholder(tf.float32, [1, 1], "v_next")
        self.r = tf.placeholder(tf.float32, None, 'r')

        with tf.variable_scope('Critic'):
            l1 = tf.layers.dense(
                inputs=self.s,
                units=20,  # number of hidden units
                activation=tf.nn.relu,  # None
                # have to be linear to make sure the convergence of actor.
                # But linear approximator seems hardly learns the correct Q.
                kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='l1'
            )

            self.v = tf.layers.dense(
                inputs=l1,
                units=1,  # output units
                activation=None,
                kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='V'
            )

        with tf.variable_scope('squared_TD_error'):
            self.td_error = self.r + GAMMA * self.v_ - self.v
            self.loss = tf.square(self.td_error)    # TD_error = (r+gamma*V_next) - V_eval
        with tf.variable_scope('train'):
            self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
    def _build_net(self):
        with tf.name_scope('inputs'):
            self.tf_obs = tf.placeholder(tf.float32,[None,self.n_features],name='observation')
            self.tf_acts = tf.placeholder(tf.int32,[None,],name='actions_num')
            self.tf_vt = tf.placeholder(tf.float32,[None,],name='actions_value')

        layer = tf.layers.dense(
            inputs = self.tf_obs,
            units = 10,
            activation= tf.nn.tanh,
            kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3),
            bias_initializer= tf.constant_initializer(0.1),
            name='fc1'
        )

        all_act = tf.layers.dense(
            inputs = layer,
            units = self.n_actions,
            activation = None,
            kernel_initializer=tf.random_normal_initializer(mean=0,stddev=0.3),
            bias_initializer = tf.constant_initializer(0.1),
            name='fc2'
        )

        self.all_act_prob = tf.nn.softmax(all_act,name='act_prob')

        with tf.name_scope('loss'):
            #neg_log_prob = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.all_act_prob,labels =self.tf_acts)

            neg_log_prob = tf.reduce_sum(-tf.log(self.all_act_prob) * tf.one_hot(indices=self.tf_acts,depth=self.n_actions),axis=1)
            loss = tf.reduce_mean(neg_log_prob * self.tf_vt)


        with tf.name_scope('train'):
            self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
    def __init__(self, sess, n_features, n_actions, lr=0.001):
        self.sess = sess

        self.s = tf.placeholder(tf.float32, [1, n_features], "state")
        self.a = tf.placeholder(tf.int32, None, "act")
        self.td_error = tf.placeholder(tf.float32, None, "td_error")  # TD_error

        with tf.variable_scope('Actor'):
            l1 = tf.layers.dense(
                inputs=self.s,
                units=20,    # number of hidden units
                activation=tf.nn.relu,
                kernel_initializer=tf.random_normal_initializer(0., .1),    # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='l1'
            )

            self.acts_prob = tf.layers.dense(
                inputs=l1,
                units=n_actions,    # output units
                activation=tf.nn.softmax,   # get action probabilities
                kernel_initializer=tf.random_normal_initializer(0., .1),  # weights
                bias_initializer=tf.constant_initializer(0.1),  # biases
                name='acts_prob'
            )

        with tf.variable_scope('exp_v'):
            log_prob = tf.log(self.acts_prob[0, self.a])
            self.exp_v = tf.reduce_mean(log_prob * self.td_error)  # advantage (TD_error) guided loss

        with tf.variable_scope('train'):
            self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)  # minimize(-exp_v) = maximize(exp_v)
def decoder(input, output_dim, training, stddev=0.02, bias_value=0, reuse=False):
        
    w1 = tf.get_variable("w1", [input.get_shape()[1],1000], initializer=tf.random_normal_initializer(stddev=stddev))
    b1 = tf.get_variable("b1", [1000], initializer=tf.constant_initializer(bias_value))

    w2 = tf.get_variable("w2", [1000,1000], initializer=tf.random_normal_initializer(stddev=stddev))
    b2 = tf.get_variable("b2", [1000], initializer=tf.constant_initializer(bias_value))

    w3 = tf.get_variable("w3", [1000,output_dim], initializer=tf.random_normal_initializer(stddev=stddev))
    b3 = tf.get_variable("b3", [output_dim], initializer=tf.constant_initializer(bias_value))

    fc1 = tf.nn.relu(tf.matmul( input, w1 ) + b1, name='relu1')
    fc2 = tf.nn.relu(tf.matmul( fc1  , w2 ) + b2, name='relu2')
    fc3 = tf.nn.sigmoid(tf.matmul( fc2  , w3 ) + b3 )

    if not reuse:
        tf.histogram_summary('DE/L1/activation', fc1)
        tf.histogram_summary('DE/L1/weight'    , w1)
        tf.histogram_summary('DE/L1/bias'      , b1)
        tf.scalar_summary(   'DE/L1/sparsity'  , tf.nn.zero_fraction(fc1))
        
        tf.histogram_summary('DE/L2/activation', fc2)
        tf.histogram_summary('DE/L2/weight'    , w2)
        tf.histogram_summary('DE/L2/bias'      , b2)
        tf.scalar_summary(   'DE/L2/sparsity'  , tf.nn.zero_fraction(fc2))

        tf.histogram_summary('DE/L3/activation', fc3)
        tf.histogram_summary('DE/L3/weight'    , w3)
        tf.histogram_summary('DE/L3/bias'      , b3)
        tf.scalar_summary(   'DE/L3/sparsity'  , tf.nn.zero_fraction(fc3))
        
    return fc3, [w1, b1, w2, b2, w3, b3]
Beispiel #21
0
    def build_graph(self,test_decoder_logits):
        print('starting building graph [sentiment-discriminator]')
        with tf.variable_scope("sentiment") as scope:
            self.inputs = tf.slice(test_decoder_logits,[0,0,0],[self.batch_size,self.max_length,self.vocab_size])
            # variable
            weights = {
                'w2v' : tf.get_variable(initializer = tf.random_uniform_initializer(-0.1, 0.1, dtype=tf.float32),shape = [self.vocab_size, self.embedding_dim], name='w2v'),
                'out_1' : tf.get_variable(initializer = tf.random_normal_initializer(), shape = [self.unit_size*2, 1], name='w_out_1'),
            }
            biases = {
            'out_1' : tf.get_variable(initializer = tf.random_normal_initializer(), shape=[1], name='b_out_1'),
            }
            # structure
            def BiRNN(x):
                x = tf.unstack(x, self.max_length, 1)
                lstm_fw_cell = tf.contrib.rnn.BasicLSTMCell(self.unit_size, forget_bias=1.0)
                lstm_bw_cell = tf.contrib.rnn.BasicLSTMCell(self.unit_size,forget_bias=1.0)
                outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype = tf.float32 )
                return outputs[-1]

            self.inputs_softmax = tf.nn.softmax(tf.scalar_mul(tf.constant(5.0, shape=[]),self.inputs))
            y_list=[]
            for i in range(self.inputs.get_shape().as_list()[0]):
                y = tf.matmul(self.inputs_softmax[i], weights['w2v'])
                y = tf.reshape(y, [1, self.max_length, self.embedding_dim])
                y_list.append(y)
            embbed_layer = tf.concat(y_list,0)
            layer_1 = BiRNN(embbed_layer)
            pred = tf.matmul(layer_1, weights['out_1']) + biases['out_1'] 
            # get score
            self.score = tf.sigmoid(pred)
Beispiel #22
0
def discriminator(X, reuse=False):
    with tf.variable_scope('discriminator'):
        if reuse:
            tf.get_variable_scope().reuse_variables()

        J = 784
        K = 128
        L = 1

        W1 = tf.get_variable('D_W1', [J, K],
                             initializer=tf.random_normal_initializer(stddev=xavier_init([J, K])))
        B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer())
        W2 = tf.get_variable('D_W2', [K, L],
                             initializer=tf.random_normal_initializer(stddev=xavier_init([K, L])))
        B2 = tf.get_variable('D_B2', [L], initializer=tf.constant_initializer())

        # summary
        tf.summary.histogram('weight1', W1)
        tf.summary.histogram('weight2', W2)
        tf.summary.histogram('biases1', B1)
        tf.summary.histogram('biases2', B2)

        fc1 = tf.nn.relu((tf.matmul(X, W1) + B1))
        logits = tf.matmul(fc1, W2) + B2
        prob = tf.nn.sigmoid(logits)
        return prob, logits
    def model(self, images, input_size, output_size, isEval=None):

        with tf.variable_scope('nn_hidden1', reuse=isEval):
            #Declaring variables
            weights_h1 = tf.get_variable("weights_h1", [input_size, self.num_hidden1],
                initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(input_size)),
                          seed=self.SEED))
            weights_out = tf.get_variable("weights_out", [self.num_hidden1, output_size],
                initializer=tf.random_normal_initializer(0.0, 1.0 / math.sqrt(float(self.num_hidden1)),
                          seed=self.SEED))

            biases_b1 = tf.get_variable("biases_b1", [self.num_hidden1],
                initializer=tf.constant_initializer(0.0))
            biases_out = tf.get_variable("biases_out", [output_size],
                initializer=tf.constant_initializer(0.0))
            
            #Constructing variables, with DropOut
            layer_1 = tf.nn.relu(tf.add(tf.matmul(images, weights_h1), biases_b1))
            layer_1drop = tf.nn.dropout(layer_1, self.keep_prob)
            
            #Evaluating logits
            logits_drop = tf.matmul(layer_1drop, weights_out) + biases_out
            logits=tf.matmul(layer_1, weights_out) + biases_out
            
            reg_linear = tf.nn.l2_loss(weights_out)+tf.nn.l2_loss(weights_h1)

            if isEval:
                return logits
            else:
                regularizers = reg_linear
                return (logits_drop, regularizers)
    def initializeParameters(self, m, n):
        """
        Arguments:
            m -- number of users
            n -- number of items

        Returns:
            parameters -- parameters['b'], global bias, scalar
                          parameters['u'], users bias, shape (m, 1)
                          parameters['d'], item bias, shape (1, n)
                          parameters['P'], users feature matrix, shape (m, K)
                          parameters['Q'], items feature matrix, shape (n, K)        
        """
        k = self.K
        
        parameters = {}
        parameters['b'] = tf.get_variable(name='b', dtype=tf.float64, shape=[],
                                          initializer=tf.zeros_initializer())

        parameters['u'] = tf.get_variable(name='u', dtype=tf.float64, shape=[m, 1],
                                          initializer=tf.zeros_initializer())

        parameters['d'] = tf.get_variable(name='d', dtype=tf.float64, shape=[1, n],
                                          initializer=tf.zeros_initializer())

        parameters['P'] = tf.get_variable(name='P', dtype=tf.float64, shape=[m, k],
                                          initializer=tf.random_normal_initializer())

        parameters['Q'] = tf.get_variable(name='Q', dtype=tf.float64, shape=[n, k],
                                          initializer=tf.random_normal_initializer())

        return parameters
Beispiel #25
0
    def __init__(self,sess,n_features,gamma = 0.9,lr=0.01):

        self.sess = sess
        self.s = tf.placeholder(tf.float32,[1,n_features],name='state')
        self.v_ = tf.placeholder(tf.float32,[1,1],name='v_next')
        self.r = tf.placeholder(tf.float32,None,name='r')

        with tf.variable_scope('Critic'):
            l1 = tf.layers.dense(
                inputs = self.s,
                units = 20,
                activation = tf.nn.relu,
                kernel_initializer = tf.random_normal_initializer(0,0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'l1'
            )

            self.v = tf.layers.dense(
                inputs = l1,
                units = 1,
                activation = None,
                kernel_initializer=tf.random_normal_initializer(0,0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'V'
            )


        with tf.variable_scope('squared_TD_error'):
            self.td_error  = self.r + gamma * self.v_ - self.v
            self.loss = tf.square(self.td_error)


        with tf.variable_scope('train'):
            self.train_op = tf.train.AdamOptimizer(lr).minimize(self.loss)
Beispiel #26
0
    def __init__(self, encoder_rnn_output, label_onehot, is_training=True):
        self.encoder_rnn_output = encoder_rnn_output
        self.label_onehot = label_onehot

        self.is_training = is_training


        with tf.variable_scope("encoder_linear1"):
            context_to_hidden_W = tf.get_variable(name="context_to_hidden_W",
                                                  shape=[FLAGS.RNN_SIZE + FLAGS.LABEL_CLASS,
                                                         100],
                                                  dtype=tf.float32,
                                                  initializer=tf.random_normal_initializer(stddev=0.1))

            context_to_hidden_b = tf.get_variable(name="context_to_hidden_b",
                                                  shape=[100],
                                                  dtype=tf.float32)


        with tf.variable_scope("encoder_linear2"):
            context_to_mu_W = tf.get_variable(name="context_to_mu_W",
                                              shape=[100,
                                                     FLAGS.LATENT_VARIABLE_SIZE],
                                              dtype=tf.float32,
                                              initializer=tf.random_normal_initializer(stddev=0.1))

            context_to_mu_b = tf.get_variable(name="context_to_mu_b",
                                              shape=[FLAGS.LATENT_VARIABLE_SIZE],
                                              dtype=tf.float32)

            context_to_logvar_W = tf.get_variable(
                                              name="context_to_logvar_W",
                                              shape=[100,
                                                    FLAGS.LATENT_VARIABLE_SIZE],
                                              dtype=tf.float32,
                                              initializer=tf.random_normal_initializer(stddev=0.1))

            context_to_logvar_b = tf.get_variable(
                                              name="context_to_logvar_b",
                                              shape=[FLAGS.LATENT_VARIABLE_SIZE],
                                              dtype=tf.float32)

        with tf.name_scope("rnn_output_and_label"):
            rnn_output_and_label = tf.concat((encoder_rnn_output, self.label_onehot),
                                             axis=1,
                                             name="concat_encoder_rnn_output_and_label")

        with tf.name_scope("sampler_hiddenstate"):
            h = tf.nn.relu(tf.matmul(rnn_output_and_label, context_to_hidden_W) + context_to_hidden_b)

        with tf.name_scope("mu"):
            self.mu = tf.matmul(h, context_to_mu_W) + context_to_mu_b
        with tf.name_scope("log_var"):
            self.logvar = tf.matmul(h, context_to_logvar_W) + context_to_logvar_b

        with tf.name_scope("z"):
            z = tf.truncated_normal((FLAGS.BATCH_SIZE, FLAGS.LATENT_VARIABLE_SIZE), stddev=1.0)

        with tf.name_scope("latent_variables"):
            self.latent_variables = self.mu + tf.exp(0.5 * self.logvar) * z
Beispiel #27
0
    def __init__(self,sess,n_features,n_actions,lr=0.001):
        self.sess = sess

        self.s = tf.placeholder(tf.float32,[1,n_features],name='state')
        self.a = tf.placeholder(tf.int32,None,name='act')
        self.td_error = tf.placeholder(tf.float32,None,"td_error")

        with tf.variable_scope('Actor'):
            l1 = tf.layers.dense(
                inputs = self.s,
                units = 20,
                activation = tf.nn.relu,
                kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'l1'
            )

            self.acts_prob = tf.layers.dense(
                inputs = l1,
                units = n_actions,
                activation = tf.nn.softmax,
                kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1),
                bias_initializer = tf.constant_initializer(0.1),
                name = 'acts_prob'
            )


            with tf.variable_scope('exp_v'):
                log_prob = tf.log(self.acts_prob[0,self.a])
                self.exp_v = tf.reduce_mean(log_prob * self.td_error)


            with tf.variable_scope('train'):
                self.train_op =  tf.train.AdamOptimizer(lr).minimize(-self.exp_v)
Beispiel #28
0
def poseEvalNetTiny(lin,locs,conf,trainPhase,dropout):

    lin_sz = tf.Tensor.get_shape(lin).as_list()
    lin_numel = reduce(operator.mul, lin_sz[1:], 1)
    lin_re = tf.reshape(lin,[-1,lin_numel])
    lin_re = tf.nn.dropout(lin_re,dropout)
    with tf.variable_scope('lin_fc'):
        weights = tf.get_variable("weights", [lin_numel, conf.nfcfilt],
            initializer=tf.random_normal_initializer(stddev=0.005))
        biases = tf.get_variable("biases", conf.nfcfilt,
            initializer=tf.constant_initializer(0))
        
        lin_fc = tf.nn.relu(batch_norm_2D(tf.matmul(lin_re,weights)+biases,trainPhase))

        
    loc_sz = tf.Tensor.get_shape(locs).as_list()
    loc_numel = reduce(operator.mul, loc_sz[1:], 1)
    loc_re = tf.reshape(locs,[-1,loc_numel])
    with tf.variable_scope('loc_fc'):
        weights = tf.get_variable("weights", [loc_numel, conf.nfcfilt],
            initializer=tf.random_normal_initializer(stddev=0.005))
        biases = tf.get_variable("biases", conf.nfcfilt,
            initializer=tf.constant_initializer(0))
        
        loc_fc = tf.nn.relu(batch_norm_2D(tf.matmul(loc_re,weights)+biases,trainPhase))
        
    joint_fc = tf.concat(1,[lin_fc,loc_fc])
    
    with tf.variable_scope('fc1'):
        weights = tf.get_variable("weights", [conf.nfcfilt*2, conf.nfcfilt],
            initializer=tf.random_normal_initializer(stddev=0.005))
        biases = tf.get_variable("biases", conf.nfcfilt,
            initializer=tf.constant_initializer(0))
        
        joint_fc1 = tf.nn.relu(batch_norm_2D(tf.matmul(joint_fc,weights)+biases,trainPhase))

    with tf.variable_scope('fc2'):
        weights = tf.get_variable("weights", [conf.nfcfilt, conf.nfcfilt],
            initializer=tf.random_normal_initializer(stddev=0.005))
        biases = tf.get_variable("biases", conf.nfcfilt,
            initializer=tf.constant_initializer(0))
        
        joint_fc2 = tf.nn.relu(batch_norm_2D(tf.matmul(joint_fc1,weights)+biases,trainPhase))
        
    with tf.variable_scope('out'):
        weights = tf.get_variable("weights", [conf.nfcfilt, 2],
            initializer=tf.random_normal_initializer(stddev=0.005))
        biases = tf.get_variable("biases", 2,
            initializer=tf.constant_initializer(0))
        
        out = tf.matmul(joint_fc2,weights)+biases
        
    layer_dict = {'lin_fc':lin_fc,
                  'loc_fc':loc_fc,
                  'joint_fc1':joint_fc1,
                  'joint_fc2':joint_fc2,
                  'out':out
                 }
    return out,layer_dict
  def add_model(self, inputs):
    """Creates the RNN LM model.

    In the space provided below, you need to implement the equations for the
    RNN LM model. Note that you may NOT use built in rnn_cell functions from
    tensorflow.

    Hint: Use a zeros tensor of shape (batch_size, hidden_size) as
          initial state for the RNN. Add this to self as instance variable

          self.initial_state
  
          (Don't change variable name)
    Hint: Add the last RNN output to self as instance variable

          self.final_state

          (Don't change variable name)
    Hint: Make sure to apply dropout to the inputs and the outputs.
    Hint: Use a variable scope (e.g. "RNN") to define RNN variables.
    Hint: Perform an explicit for-loop over inputs. You can use
          scope.reuse_variables() to ensure that the weights used at each
          iteration (each time-step) are the same. (Make sure you don't call
          this for iteration 0 though or nothing will be initialized!)
    Hint: Here are the dimensions of the various variables you will need to
          create:
      
          H: (hidden_size, hidden_size) 
          I: (embed_size, hidden_size)
          b_1: (hidden_size,)

    Args:
      inputs: List of length num_steps, each of whose elements should be
              a tensor of shape (batch_size, embed_size).
    Returns:
      outputs: List of length num_steps, each of whose elements should be
               a tensor of shape (batch_size, hidden_size)
    """
    ### YOUR CODE HERE
    self.initial_state = tf.zeros([self.config.batch_size,self.config.hidden_size])
    rnn_outputs = []
    with tf.variable_scope("RNN") as scope:
      rnn_outputs = []
      H = tf.get_variable("H", [self.config.hidden_size, self.config.hidden_size],
                         initializer=tf.random_normal_initializer())
      I = tf.get_variable("I", [self.config.embed_size, self.config.hidden_size],
                         initializer=tf.random_normal_initializer())
      b_1 = tf.Variable(tf.zeros([self.config.hidden_size,]), "b_1")
      # loop over the nums of steps
      state = self.initial_state
      for i in xrange(len(inputs)):
        scope.reuse_variables()
        tmp = tf.matmul(state,H) + tf.matmul(inputs[i], I) + b_1
        state = tf.nn.dropout(tf.sigmoid(tmp),self.dropout_placeholder)
        rnn_outputs.append(state)
      self.final_state = state
 
    ### END YOUR CODE
    return rnn_outputs
def inference(input):
  weights = tf.get_variable(
      "weights", [784, 10], initializer=tf.random_normal_initializer())
  bias = tf.get_variable(
      "bias", [10], initializer=tf.random_normal_initializer())
  logits = tf.matmul(input, weights) + bias

  return logits
Beispiel #31
0
def conv_fire_module(name,
                     prevLayerOut,
                     prevLayerDim,
                     fireDims,
                     wd=None,
                     **kwargs):
    USE_FP_16 = kwargs.get('usefp16')
    dtype = tf.float16 if USE_FP_16 else tf.float32

    existingParams = kwargs.get('existingParams')

    if (fireDims.get('cnn3x3')):
        cnnName = 'cnn3x3'
        kernelSize = 3
    if (fireDims.get('cnn5x5')):
        cnnName = 'cnn5x5'
        kernelSize = 5
    if (fireDims.get('cnn7x7')):
        cnnName = 'cnn7x7'
        kernelSize = 7

    with tf.variable_scope(name):
        with tf.variable_scope(cnnName) as scope:
            layerName = scope.name.replace("/", "_")
            #kernel = _variable_with_weight_decay('weights',
            #                                     shape=[kernelSize, kernelSize, prevLayerDim, fireDims['cnn3x3']],
            #                                     initializer=existingParams[layerName]['weights'] if (existingParams is not None and
            #                                                                                         layerName in existingParams) else
            #                                                    (tf.contrib.layers.xavier_initializer_conv2d() if kwargs.get('phase')=='train' else
            #                                                     tf.constant_initializer(0.0, dtype=dtype)),
            #                                     dtype=dtype,
            # wd=wd,
            #                                     trainable=kwargs.get('tuneExistingWeights') if (existingParams is not None and
            #                                                                               layerName in existingParams) else True)
            stddev = np.sqrt(2 /
                             np.prod(prevLayerOut.get_shape().as_list()[1:]))
            kernel = _variable_with_weight_decay(
                'weights',
                shape=[
                    kernelSize, kernelSize, prevLayerDim, fireDims[cnnName]
                ],
                initializer=existingParams[layerName]['weights'] if
                (existingParams is not None and layerName in existingParams)
                else (tf.random_normal_initializer(
                    stddev=stddev) if kwargs.get('phase') == 'train' else
                      tf.constant_initializer(0.0, dtype=dtype)),
                dtype=dtype,
                wd=wd,
                trainable=kwargs.get('tuneExistingWeights') if
                (existingParams is not None
                 and layerName in existingParams) else True)
            conv = tf.nn.conv2d(prevLayerOut,
                                kernel, [1, 1, 1, 1],
                                padding='SAME')

            if kwargs.get('weightNorm'):
                # calc weight norm
                conv = batch_norm('weight_norm', conv, dtype)

            if existingParams is not None and layerName in existingParams:
                biases = tf.get_variable(
                    'biases',
                    initializer=existingParams[layerName]['biases'],
                    dtype=dtype)
            else:
                biases = tf.get_variable(
                    'biases', [fireDims[cnnName]],
                    initializer=tf.constant_initializer(0.0),
                    dtype=dtype)

            conv = tf.nn.bias_add(conv, biases)
            convRelu = tf.nn.relu(conv, name=scope.name)
            _activation_summary(convRelu)

        return convRelu, fireDims[cnnName]
Beispiel #32
0
def fconv_layer(input_data,
                filter_num,
                name,
                is_train=True,
                padding="VALID",
                init_method="xavier",
                bias_term=True,
                is_pretrain=True):
    """
    fully conv layer
    :param input_data: the input data
    :param filter_num: the number of the convolutional kernel
    :param name: name of the layer
    :param is_train: if False, skip this layer, default is True
    :param padding: the padding method, "SAME" | "VALID" (default: "VALID")
    :param init_method: the method of weights initialization (default: xavier)
    :param bias_term: whether the bias term exists or not (default: False)
    :param is_pretrain: whether the parameters are trainable (default: True)
    :return:
        output: a 4-D tensor [batch_size, height, width. channel]
    """
    if is_train is True:
        shape = input_data.get_shape()
        conv_height, conv_width, conv_channel = shape[1].value, shape[
            2].value, shape[3].value

        with tf.variable_scope(name):

            # the method of weights initialization
            if init_method == "xavier":
                initializer = tf.contrib.layers.xavier_initializer()
            elif init_method == "gaussian":
                initializer = tf.random_normal_initializer(stddev=0.01)
            else:
                initializer = tf.truncated_normal_initializer(stddev=0.01)

            weights = tf.get_variable(
                name="weights",
                shape=[conv_height, conv_width, conv_channel, filter_num],
                dtype=tf.float32,
                initializer=initializer,
                trainable=is_pretrain)
            biases = tf.get_variable(name="biases",
                                     shape=[filter_num],
                                     dtype=tf.float32,
                                     initializer=tf.constant_initializer(0.0),
                                     trainable=is_pretrain)
            feature_map = tf.nn.conv2d(input=input_data,
                                       filter=weights,
                                       strides=[1, 1, 1, 1],
                                       padding=padding,
                                       name="conv")
            # biases term
            if bias_term is True:
                output = tf.nn.bias_add(value=feature_map,
                                        bias=biases,
                                        name="biases_add")
            else:
                output = feature_map
    else:
        output = input_data

    # info show
    shape = output.get_shape()
    print("name: %s, shape: (%d, %d, %d)" %
          (name, shape[1], shape[2], shape[3]))

    return output
def generator(z_sample, isTrainable=True, reuse=False, name='theta_generator'):
    with tf.variable_scope(name) as scope:
        #decoder_activations = {};
        if reuse:
            scope.reuse_variables()
        #print('z_sample.shape: ',z_sample.shape);
        z_sample = tf.layers.dense(
            z_sample,
            4 * 4 * 1024,
            activation=None,
            trainable=isTrainable,
            reuse=reuse,
            name='dec_dense_fc_first_layer',
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev))
        z_sample = tf.layers.batch_normalization(z_sample,
                                                 training=isTrainable,
                                                 reuse=reuse,
                                                 name='bn_0')
        z_sample = tf.nn.relu(z_sample)
        z_sample = tf.reshape(z_sample, [-1, 4, 4, 1024])
        #8x8x512

        deconv1 = tf.layers.conv2d_transpose(
            z_sample,
            kernel_initializer=tf.random_normal_initializer(stddev=stddev),
            filters=512,
            kernel_size=[3, 3],
            padding='SAME',
            activation=None,
            strides=(2, 2),
            name='dec_deconv1_layer',
            trainable=isTrainable,
            reuse=reuse)
        # 16x16
        deconv1 = tf.layers.batch_normalization(deconv1,
                                                training=isTrainable,
                                                reuse=reuse,
                                                name='bn_1')
        deconv1 = tf.nn.relu(deconv1, name='relu_deconv_1')

        # #16x16x256
        deconv2 = tf.layers.conv2d_transpose(
            deconv1,
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev),
            filters=256,
            kernel_size=[5, 5],
            padding='SAME',
            activation=None,
            strides=(2, 2),
            name='dec_deconv2_layer',
            trainable=isTrainable,
            reuse=reuse)
        # 16x16
        deconv2 = tf.layers.batch_normalization(deconv2,
                                                training=isTrainable,
                                                reuse=reuse,
                                                name='bn_2')
        deconv2 = tf.nn.relu(deconv2, name='relu_deconv_2')

        #32x32x128
        deconv3 = tf.layers.conv2d_transpose(
            deconv2,
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev),
            filters=128,
            kernel_size=[5, 5],
            padding='SAME',
            activation=None,
            strides=(2, 2),
            name='dec_deconv3_layer',
            trainable=isTrainable,
            reuse=reuse)
        # 16x16
        deconv3 = tf.layers.batch_normalization(deconv3,
                                                training=isTrainable,
                                                reuse=reuse,
                                                name='bn_3')
        deconv3 = tf.nn.relu(deconv3, name='relu_deconv_3')

        deconv4 = tf.layers.conv2d_transpose(
            deconv3,
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev),
            filters=64,
            kernel_size=[5, 5],
            padding='SAME',
            activation=None,
            strides=(2, 2),
            name='dec_deconv4_layer',
            trainable=isTrainable,
            reuse=reuse)
        # 16x16
        deconv4 = tf.layers.batch_normalization(deconv4,
                                                training=isTrainable,
                                                reuse=reuse,
                                                name='bn_4')
        deconv4 = tf.nn.relu(deconv4, name='relu_deconv_4')

        #64x64x64
        deconv5 = tf.layers.conv2d_transpose(
            deconv4,
            kernel_initializer=tf.truncated_normal_initializer(stddev=stddev),
            filters=3,
            kernel_size=[5, 5],
            padding='SAME',
            activation=None,
            strides=(1, 1),
            name='dec_deconv5_layer',
            trainable=isTrainable,
            reuse=reuse)
        # 16x16
        #deconv4 = tf.layers.dropout(deconv4,rate=keep_prob,training=True);
        deconv5 = tf.nn.tanh(deconv5)
        #64x64x3

        deconv_5_reshaped = tf.reshape(
            deconv5, [-1, img_height, img_width, num_channels])
        return deconv_5_reshaped
Beispiel #34
0
def main(args):

    rmf_path = os.path.join(args.data_path, RATING_MATRIX_FILE)
    train_file = os.path.join(args.data_path, TRAIN_FILE)
    test_file = os.path.join(args.data_path, TEST_FILE)

    print(rmf_path)
    print(train_file)
    print(test_file)

    rm = Reader.get_rating_matrix(rmf_path)
    train_data = Reader(train_file, rm, args.batch_size, args.chunk_size,
                        args.num_ratings)
    test_data = Reader(test_file, rm, args.batch_size, args.chunk_size,
                       args.num_ratings)
    num_users = train_data.num_users
    num_items = train_data.num_items
    epochs = args.num_epochs

    print(rm.shape)
    print("Number of users: ", num_users)
    print("Number of items: ", num_items)

    if args.chunk_size == 0:
        cs = train_data.max_seq_len
    else:
        cs = min(args.chunk_size, train_data.max_seq_len)

    settings = {
        "batch_size": args.batch_size,
        "chunk_size": cs,
        "lr": args.learning_rate,
        "rho": args.decay,
        "num_users": num_users,
        "num_items": num_items,
        "num_units": args.num_units,
        "k": args.num_ratings,
        "cell_act": args.cell_activation,
        #"debug": True,
    }

    print("Model configuration:\n", settings)

    train_errors = list()
    test_errors = list()
    _train_rmse = list()
    _test_rmse = list()

    with tf.Graph().as_default(), tf.Session() as session:
        initializer = tf.random_normal_initializer(mean=0,
                                                   stddev=1 /
                                                   sqrt(args.num_units))
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            train_model = RNADECF(is_training=True, **settings)
        with tf.variable_scope("model", reuse=True, initializer=initializer):
            test_model = RNADECF(is_training=False, **settings)

        # Initializing all model weights
        tf.global_variables_initializer().run()

        with open("log.txt", "w+", 0) as log:
            log.write("Rating matrix file path : [{}]\n".format(rmf_path))
            log.write("Training data file : [{}]\n".format(train_file))
            log.write("Testing data file : [{}]\n".format(test_file))
            log.write("Model configuration:\n{}\n".format(settings))

            for i in range(1, epochs + 1):
                train_err, test_err, train_rmse, test_rmse = run_epoch(
                    train_model, test_model, session, train_data, test_data,
                    log)
                log.write("Epoch {}: Training error {}\n".format(i, train_err))
                log.write("Epoch {}: Test error {}\n".format(i, test_err))
                log.write("Epoch {}: Training RMSE {}\n".format(i, train_rmse))
                log.write("Epoch {}: Test RMSE {}\n".format(i, test_rmse))
                print("Epoch {}: Training error {}\n".format(i, train_err))
                print("Epoch {}: Test error {}\n".format(i, test_err))
                print("Epoch {}: Training RMSE {}\n".format(i, train_rmse))
                print("Epoch {}: Test RMSE {}\n".format(i, test_rmse))
                train_errors.append(train_err)
                test_errors.append(test_err)
                _train_rmse.append(train_rmse)
                _test_rmse.append(test_rmse)

            x = range(epochs)
            f, axarr = plt.subplots(2, sharex=True)
            axarr[0].plot(x, train_errors)
            axarr[0].plot(x, test_errors)
            axarr[0].scatter(x, train_errors)
            axarr[0].scatter(x, test_errors)
            axarr[0].set_title('Cross-entopy loss')
            axarr[1].plot(x, _train_rmse)
            axarr[1].plot(x, _test_rmse)
            axarr[1].scatter(x, _train_rmse)
            axarr[1].scatter(x, _test_rmse)
            axarr[1].set_title('RMSE')
            plt.savefig("results.png")
tf.reset_default_graph()

ad = [ 8.32329655,  8.32329655,  8.32329845,  8.32329845,  8.32329845,  8.32329845,
  8.32329845,  8.32329941,  8.28163242,  8.32329845,  8.32329845,  8.32329655,
  8.32329941, 8.32329655,  8.32329845 , 8.32329655 , 8.32329845 , 8.32329845,
  8.32329655,  8.3231554,   8.32329655,  8.32329845,  8.32325554,  8.32329845,
  8.32329655,  8.28935242,  8.32329655, 8.3232975  , 8.32329941 , 8.32329845,
  8.32329845,  8.32329082,  8.32329845,  8.32329655,  8.32329845,  8.32329845,
  8.32329845,  8.32329845,  8.32329655,  8.3232975 ,  8.32329655,  8.32317448,
  8.32329655,  8.3231802 ,  8.32329845,  8.27982044,  8.32329655,  8.32329845,
  8.32329845,  8.32329845,  8.3232975 ,  8.32329845,  8.3232975 ,  8.3232975,
  8.32329845,  8.3232975 ,  8.27784348,  8.32329845,  8.32308769, 8.32329845,
  8.32314777,  8.30504799,  8.32329845, 8.3232975 ]
  
a = tf.Variable(ad)
b = tf.get_variable('b', shape = (2,3,5), initializer = tf.random_normal_initializer() )


batch_start = tf.cast(tf.ones_like(b[:, 0:1, 0:1]), dtype = tf.int64)
batch_start = tf.multiply(batch_start, 2)
batch_start = tf.squeeze(batch_start, axis = [-1])

sess = tf.Session()
sess.run(tf.global_variables_initializer())

print(sess.run(a))
print(sess.run(b))
print()
print(sess.run(batch_start))
print()
Beispiel #36
0
from __future__ import print_function, division, absolute_import, unicode_literals
import tensorflow.contrib as tf_contrib
from tensorflow.contrib.layers.python.layers import layers

import os
import shutil
import numpy as np
import logging
from collections import OrderedDict
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score
import tensorflow as tf
from ops import *
from nets.tf_unet.layers import *

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
weight_init = tf.random_normal_initializer(mean=0.0, stddev=0.1)


def Unet(x,
         labels,
         keep_prob=1.0,
         channels=3,
         n_class=2,
         num_layers=5,
         features_root=64,
         filter_size=3,
         pool_size=2,
         summaries=True,
         trainable=True,
         reuse=False,
         scope='dis'):
Beispiel #37
0
    vgg.build(model_input)

# define the new fc layers here
with tf.name_scope('face_weights'):
    #	w1 = tf.get_variable("w1",[vgg_out_dim,fc2_num_weights],initializer = tf.random_normal_initializer(stddev = 1e-4))
    #	b1 = tf.get_variable("b1",initializer = tf.zeros([fc2_num_weights]))
    #
    #	w2 = tf.get_variable("w2",[fc2_num_weights,fin_out_dim],initializer = tf.random_normal_initializer(stddev = 1e-4))
    #	b2 = tf.get_variable("b2",initializer = tf.zeros([fin_out_dim]))

    vgg_oup = vgg.fc6
    fc1 = tf.layers.dropout(tf.layers.dense(
        vgg_oup,
        fc2_num_weights,
        activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(stddev=1e-4),
        name='fc1'),
                            rate=0.2)
    fin_out = tf.layers.dense(
        tf.layers.batch_normalization(fc1),
        fin_out_dim,
        activation=tf.nn.relu,
        kernel_initializer=tf.random_normal_initializer(stddev=1e-4),
        name='model_output')

    # res of the computation graphs
    # first fc
    #	fc2 = tf.nn.relu(tf.nn.dropout(tf.nn.xw_plus_b(vgg_oup,w1,b1),0.8))
    #	# second (and last) fc
    #	fin_out = tf.nn.relu(tf.nn.xw_plus_b(fc2,w2,b2),name = 'model_output')
    # train op
Beispiel #38
0
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
    :param vgg_layer3_out: TF Tensor for VGG Layer 3 output
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
    :param vgg_layer7_out: TF Tensor for VGG Layer 7 output
    :param num_classes: Number of classes to classify
    :return: The Tensor for the last layer of output
    """
    # TODO: Implement function
    #conv 1x1 of layer7
    layer7_conv1x1 = tf.layers.conv2d(
        vgg_layer7_out,
        num_classes,
        1,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #upsampling
    output1 = tf.layers.conv2d_transpose(
        layer7_conv1x1,
        num_classes,
        4,
        strides=(2, 2),
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #conv 1x1 of layer4
    layer4_conv1x1 = tf.layers.conv2d(
        vgg_layer4_out,
        num_classes,
        1,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #Skip connection
    skip1 = tf.add(output1, layer4_conv1x1)
    #upsampling
    output2 = tf.layers.conv2d_transpose(
        skip1,
        num_classes,
        4,
        strides=(2, 2),
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #conv 1x1 of layer3
    layer3_conv1x1 = tf.layers.conv2d(
        vgg_layer3_out,
        num_classes,
        1,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))
    #Skip connection
    skip2 = tf.add(output2, layer3_conv1x1)
    #Upsampling
    final_output = tf.layers.conv2d_transpose(
        skip2,
        num_classes,
        16,
        strides=(8, 8),
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3))

    return final_output
WARM_SETP = int(1.0 / 8.0 * SAVE_WEIGHTS_INTE)

# -------------------------------------------- Data_preprocess_config
DATASET_NAME = 'coco'  # 'pascal', 'coco'
PIXEL_MEAN = [123.68, 116.779, 103.939
              ]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
PIXEL_MEAN_ = [0.485, 0.456, 0.406]
PIXEL_STD = [0.229, 0.224, 0.225
             ]  # R, G, B. In tf, channel is RGB. In openCV, channel is BGR
IMG_SHORT_SIDE_LEN = 600
IMG_MAX_LENGTH = 1000
CLASS_NUM = 80

# --------------------------------------------- Network_config
SUBNETS_WEIGHTS_INITIALIZER = tf.random_normal_initializer(mean=0.0,
                                                           stddev=0.01,
                                                           seed=None)
SUBNETS_BIAS_INITIALIZER = tf.constant_initializer(value=0.0)
PROBABILITY = 0.01
FINAL_CONV_BIAS_INITIALIZER = tf.constant_initializer(
    value=-math.log((1.0 - PROBABILITY) / PROBABILITY))
WEIGHT_DECAY = 1e-4

# ---------------------------------------------Anchor config
LEVEL = ['P3', 'P4', 'P5', 'P6', 'P7']
BASE_ANCHOR_SIZE_LIST = [32, 64, 128, 256, 512]
ANCHOR_STRIDE = [8, 16, 32, 64, 128]
ANCHOR_SCALES = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)]
ANCHOR_RATIOS = [0.5, 1.0, 2.0]
ANCHOR_SCALE_FACTORS = None
USE_CENTER_OFFSET = True
Beispiel #40
0
    def __init__(self,
                 level_sizes,
                 init_lr_range=(1e-6, 1e-2),
                 learnable_decay=True,
                 dynamic_output_scale=True,
                 use_attention=False,
                 use_log_objective=True,
                 num_gradient_scales=4,
                 zero_init_lr_weights=True,
                 use_log_means_squared=True,
                 use_relative_lr=True,
                 use_extreme_indicator=False,
                 max_log_lr=33,
                 obj_train_max_multiplier=-1,
                 use_problem_lr_mean=False,
                 use_gradient_shortcut=False,
                 use_lr_shortcut=False,
                 use_grad_products=False,
                 use_multiple_scale_decays=False,
                 learnable_inp_decay=True,
                 learnable_rnn_init=True,
                 random_seed=None,
                 **kwargs):
        """Initializes the RNN per-parameter optimizer.

    The hierarchy consists of up to three levels:
    Level 0: per parameter RNN
    Level 1: per tensor RNN
    Level 2: global RNN

    Args:
      level_sizes: list or tuple with 1, 2, or 3 integers, the number of units
          in each RNN in the hierarchy (level0, level1, level2).
          length 1: only coordinatewise rnn's will be used
          length 2: coordinatewise and tensor-level rnn's will be used
          length 3: a single global-level rnn will be used in addition to
             coordinatewise and tensor-level
      init_lr_range: the range in which to initialize the learning rates
      learnable_decay: whether to learn weights that dynamically modulate the
          input scale via RMS style decay
      dynamic_output_scale: whether to learn weights that dynamically modulate
          the output scale
      use_attention: whether to use attention to train the optimizer
      use_log_objective: whether to train on the log of the objective
      num_gradient_scales: the number of scales to use for gradient history
      zero_init_lr_weights: whether to initialize the lr weights to zero
      use_log_means_squared: whether to track the log of the means_squared,
          used as a measure of signal vs. noise in gradient.
      use_relative_lr: whether to use the relative learning rate as an
          input during training (requires learnable_decay=True)
      use_extreme_indicator: whether to use the extreme indicator for learning
          rates as an input during training (requires learnable_decay=True)
      max_log_lr: the maximum log learning rate allowed during train or test
      obj_train_max_multiplier: max objective increase during a training run
      use_problem_lr_mean: whether to use the mean over all learning rates in
          the problem when calculating the relative learning rate as opposed to
          the per-tensor mean
      use_gradient_shortcut: Whether to add a learned affine projection of the
          gradient to the update delta in addition to the gradient function
          computed by the RNN
      use_lr_shortcut: Whether to add as input the difference between the log lr
          and the desired log lr (1e-3)
      use_grad_products: Whether to use gradient products in the rnn input.
          Only applicable if num_gradient_scales > 1
      use_multiple_scale_decays: Whether to use multiple scales for the scale
          decay, as with input decay
      learnable_inp_decay: Whether to learn the input decay weights and bias.
      learnable_rnn_init: Whether to learn the RNN state initialization.
      random_seed: Random seed for random variable initializers. (Default: None)
      **kwargs: args passed to TrainableOptimizer's constructor

    Raises:
      ValueError: If level_sizes is not a length 1, 2, or 3 list.
      ValueError: If there are any non-integer sizes in level_sizes.
      ValueError: If the init lr range is not of length 2.
      ValueError: If the init lr range is not a valid range (min > max).
    """
        if len(level_sizes) not in [1, 2, 3]:
            raise ValueError(
                "HierarchicalRNN only supports 1, 2, or 3 levels in the "
                "hierarchy, but {} were requested.".format(len(level_sizes)))
        if any(not isinstance(level, int) for level in level_sizes):
            raise ValueError(
                "Level sizes must be integer values, were {}".format(
                    level_sizes))
        if len(init_lr_range) != 2:
            raise ValueError("Initial LR range must be len 2, was {}".format(
                len(init_lr_range)))
        if init_lr_range[0] > init_lr_range[1]:
            raise ValueError("Initial LR range min is greater than max.")

        self.learnable_decay = learnable_decay
        self.dynamic_output_scale = dynamic_output_scale
        self.use_attention = use_attention
        self.use_log_objective = use_log_objective
        self.num_gradient_scales = num_gradient_scales
        self.zero_init_lr_weights = zero_init_lr_weights
        self.use_log_means_squared = use_log_means_squared
        self.use_relative_lr = use_relative_lr
        self.use_extreme_indicator = use_extreme_indicator
        self.max_log_lr = max_log_lr
        self.use_problem_lr_mean = use_problem_lr_mean
        self.use_gradient_shortcut = use_gradient_shortcut
        self.use_lr_shortcut = use_lr_shortcut
        self.use_grad_products = use_grad_products
        self.use_multiple_scale_decays = use_multiple_scale_decays
        self.learnable_inp_decay = learnable_inp_decay
        self.learnable_rnn_init = learnable_rnn_init

        self.random_seed = random_seed

        self.num_layers = len(level_sizes)
        self.init_lr_range = init_lr_range

        self.reuse_vars = None
        self.reuse_global_state = None
        self.cells = []
        self.init_vectors = []

        with tf.variable_scope(opt.OPTIMIZER_SCOPE):

            self._initialize_rnn_cells(level_sizes)

            # get the cell size for the per-parameter RNN (Level 0)
            cell_size = level_sizes[0]

            # Random normal initialization scaled by the output size. This is the
            # scale for the RNN *readouts*. RNN internal weight scale is set in the
            # BiasGRUCell call.
            scale_factor = FLAGS.hrnn_rnn_readout_scale / math.sqrt(cell_size)
            scaled_init = tf.random_normal_initializer(0.,
                                                       scale_factor,
                                                       seed=self.random_seed)

            # weights for projecting the hidden state to a parameter update
            self.update_weights = tf.get_variable("update_weights",
                                                  shape=(cell_size, 1),
                                                  initializer=scaled_init)

            if self.use_attention:
                # weights for projecting the hidden state to the location at which the
                # gradient is attended
                self.attention_weights = tf.get_variable(
                    "attention_weights",
                    initializer=self.update_weights.initialized_value())

            # weights for projecting the hidden state to the RMS decay term
            self._initialize_scale_decay((cell_size, 1), scaled_init)
            self._initialize_input_decay((cell_size, 1), scaled_init)

            self._initialize_lr((cell_size, 1), scaled_init)

        state_keys = [
            "parameter", "layer", "scl_decay", "inp_decay", "true_param"
        ]

        if self.dynamic_output_scale:
            state_keys.append("log_learning_rate")

        for i in range(self.num_gradient_scales):
            state_keys.append("grad_accum{}".format(i + 1))
            state_keys.append("ms{}".format(i + 1))

        super(HierarchicalRNN,
              self).__init__("hRNN",
                             state_keys,
                             use_attention=use_attention,
                             use_log_objective=use_log_objective,
                             obj_train_max_multiplier=obj_train_max_multiplier,
                             **kwargs)
with tf.variable_scope('conv1') as scope:
    # first, reshape the image to [BATCH_SIZE, 28, 28, 1] to make it work with tf.nn.conv2d
    # use the dynamic dimension -1
    images = tf.reshape(X, [-1, 28, 28, 1], name="input")

    # create kernel variable of dimension [5, 5, 1, 32]
    # use tf.truncated_normal_initializer()
    kernel = tf.get_variable("kernel",
                             shape=[5, 5, 1, 32],
                             initializer=tf.truncated_normal_initializer())

    # create biases variable of dimension [32]
    # use tf.random_normal_initializer()
    biases = tf.get_variable("biases",
                             shape=[32],
                             initializer=tf.random_normal_initializer())

    # apply tf.nn.conv2d. strides [1, 1, 1, 1], padding is 'SAME'
    conv1 = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding="SAME")

    # apply relu on the sum of convolution output and biases
    relu1 = tf.nn.relu(conv1 + biases)

    # output is of dimension BATCH_SIZE x 28 x 28 x 32

with tf.variable_scope('pool1') as scope:
    # apply max pool with k-size [1, 2, 2, 1], and strides [1, 2, 2, 1], padding 'SAME'
    pool1 = tf.nn.max_pool(relu1, [1, 2, 2, 1], [1, 2, 2, 1], padding="SAME")

    # output is of dimension BATCH_SIZE x 14 x 14 x 32
Beispiel #42
0
def batchnorm(axis=-1, momentum=0.99, epsilon=0.001):
    initializer = tf.random_normal_initializer(0, 0.02)
    return tf.keras.layers.BatchNormalization(axis,
                                              momentum=momentum,
                                              epsilon=epsilon,
                                              gamma_initializer=initializer)
def layers(vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes):
    """
    Create the layers for a fully convolutional network.  Build skip-layers using the vgg layers.
    :param vgg_layer3_out: TF Tensor for VGG Layer 3 output
    :param vgg_layer4_out: TF Tensor for VGG Layer 4 output
    :param vgg_layer7_out: TF Tensor for VGG Layer 7 output
    :param num_classes: Number of classes to classify
    :return: The Tensor for the last layer of output
    """
    # TODO: Implement function

    # FCN Layer 1
    fcn_layer1_out = tf.layers.conv2d(vgg_layer7_out,
                                      num_classes,
                                      1,
                                      strides=1,
                                      padding='same',
                                      name="fcn_layer1_out")

    # FCN Layer 2
    fcn_layer2_trans = tf.layers.conv2d_transpose(
        fcn_layer1_out,
        num_classes,
        4,
        strides=2,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3),
        name="fcn_layer2_trans")
    fcn_layer2_1x1 = tf.layers.conv2d(vgg_layer4_out,
                                      num_classes,
                                      1,
                                      strides=1,
                                      padding='same',
                                      name="fcn_layer2_1x1")
    fcn_layer2_skip = tf.add(fcn_layer2_trans,
                             fcn_layer2_1x1,
                             name='fcn_layer2_skip')

    # FCN Layer 3
    fcn_layer3_trans = tf.layers.conv2d_transpose(
        fcn_layer2_skip,
        num_classes,
        4,
        strides=2,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3),
        name="fcn_layer3_trans")
    fcn_layer3_1x1 = tf.layers.conv2d(vgg_layer3_out,
                                      num_classes,
                                      1,
                                      strides=1,
                                      padding='same',
                                      name="cn_layer4_1x1")
    fcn_layer4_skip = tf.add(fcn_layer3_trans,
                             fcn_layer3_1x1,
                             name='fcn_layer4_skip')

    # FCN final layer.
    fcn_final = tf.layers.conv2d_transpose(
        fcn_layer4_skip,
        num_classes,
        16,
        strides=8,
        padding='same',
        kernel_initializer=tf.random_normal_initializer(stddev=0.01),
        kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3),
        name='fcn_final')
    return fcn_final
Beispiel #44
0
    def _build_word_char_embeddings(self):
        '''
        options contains key 'char_cnn': {

        'n_characters': 262,

        # includes the start / end characters
        'max_characters_per_token': 50,

        'filters': [
            [1, 32],
            [2, 32],
            [3, 64],
            [4, 128],
            [5, 256],
            [6, 512],
            [7, 512]
        ],
        'activation': 'tanh',

        # for the character embedding
        'embedding': {'dim': 16}

        # for highway layers
        # if omitted, then no highway layers
        'n_highway': 2,
        }
        '''
        batch_size = self.options['batch_size']
        unroll_steps = self.options['unroll_steps']
        projection_dim = self.options['lstm']['projection_dim']

        cnn_options = self.options['char_cnn']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)
        max_chars = cnn_options['max_characters_per_token']
        char_embed_dim = cnn_options['embedding']['dim']
        n_chars = cnn_options['n_characters']
        if n_chars != 261:
            raise InvalidNumberOfCharacters(
                "Set n_characters=261 for training see the README.md")
        if cnn_options['activation'] == 'tanh':
            activation = tf.nn.tanh
        elif cnn_options['activation'] == 'relu':
            activation = tf.nn.relu

        # the input character ids
        self.tokens_characters = tf.placeholder(DTYPE_INT,
                                                shape=(batch_size,
                                                       unroll_steps,
                                                       max_chars),
                                                name='tokens_characters')
        # the character embeddings
        with tf.device("/cpu:0"):
            self.embedding_weights = tf.get_variable(
                "char_embed", [n_chars, char_embed_dim],
                dtype=DTYPE,
                initializer=tf.random_uniform_initializer(-1.0, 1.0))
            # shape (batch_size, unroll_steps, max_chars, embed_dim)
            self.char_embedding = tf.nn.embedding_lookup(
                self.embedding_weights, self.tokens_characters)

            if self.bidirectional:
                self.tokens_characters_reverse = tf.placeholder(
                    DTYPE_INT,
                    shape=(batch_size, unroll_steps, max_chars),
                    name='tokens_characters_reverse')
                self.char_embedding_reverse = tf.nn.embedding_lookup(
                    self.embedding_weights, self.tokens_characters_reverse)

        # the convolutions
        def make_convolutions(inp, reuse):
            with tf.variable_scope('CNN', reuse=reuse) as scope:
                convolutions = []
                for i, (width, num) in enumerate(filters):
                    if cnn_options['activation'] == 'relu':
                        # He initialization for ReLU activation
                        # with char embeddings init between -1 and 1
                        #w_init = tf.random_normal_initializer(
                        #    mean=0.0,
                        #    stddev=np.sqrt(2.0 / (width * char_embed_dim))
                        #)

                        # Kim et al 2015, +/- 0.05
                        w_init = tf.random_uniform_initializer(minval=-0.05,
                                                               maxval=0.05)
                    elif cnn_options['activation'] == 'tanh':
                        # glorot init
                        w_init = tf.random_normal_initializer(
                            mean=0.0,
                            stddev=np.sqrt(1.0 / (width * char_embed_dim)))
                    w = tf.get_variable("W_cnn_%s" % i,
                                        [1, width, char_embed_dim, num],
                                        initializer=w_init,
                                        dtype=DTYPE)
                    b = tf.get_variable(
                        "b_cnn_%s" % i, [num],
                        dtype=DTYPE,
                        initializer=tf.constant_initializer(0.0))

                    conv = tf.nn.conv2d(
                        inp, w, strides=[1, 1, 1, 1], padding="VALID") + b
                    # now max pool
                    conv = tf.nn.max_pool(conv,
                                          [1, 1, max_chars - width + 1, 1],
                                          [1, 1, 1, 1], 'VALID')

                    # activation
                    conv = activation(conv)
                    conv = tf.squeeze(conv, squeeze_dims=[2])

                    convolutions.append(conv)

            return tf.concat(convolutions, 2)

        # for first model, this is False, for others it's True
        reuse = tf.get_variable_scope().reuse
        embedding = make_convolutions(self.char_embedding, reuse)

        self.token_embedding_layers = [embedding]

        if self.bidirectional:
            # re-use the CNN weights from forward pass
            embedding_reverse = make_convolutions(self.char_embedding_reverse,
                                                  True)

        # for highway and projection layers:
        #   reshape from (batch_size, n_tokens, dim) to
        n_highway = cnn_options.get('n_highway')
        use_highway = n_highway is not None and n_highway > 0
        use_proj = n_filters != projection_dim

        if use_highway or use_proj:
            embedding = tf.reshape(embedding, [-1, n_filters])
            if self.bidirectional:
                embedding_reverse = tf.reshape(embedding_reverse,
                                               [-1, n_filters])

        # set up weights for projection
        if use_proj:
            assert n_filters > projection_dim
            with tf.variable_scope('CNN_proj') as scope:
                W_proj_cnn = tf.get_variable(
                    "W_proj", [n_filters, projection_dim],
                    initializer=tf.random_normal_initializer(
                        mean=0.0, stddev=np.sqrt(1.0 / n_filters)),
                    dtype=DTYPE)
                b_proj_cnn = tf.get_variable(
                    "b_proj", [projection_dim],
                    initializer=tf.constant_initializer(0.0),
                    dtype=DTYPE)

        # apply highways layers
        def high(x, ww_carry, bb_carry, ww_tr, bb_tr):
            carry_gate = tf.nn.sigmoid(tf.matmul(x, ww_carry) + bb_carry)
            transform_gate = tf.nn.relu(tf.matmul(x, ww_tr) + bb_tr)
            return carry_gate * transform_gate + (1.0 - carry_gate) * x

        if use_highway:
            highway_dim = n_filters

            for i in range(n_highway):
                with tf.variable_scope('CNN_high_%s' % i) as scope:
                    W_carry = tf.get_variable(
                        'W_carry',
                        [highway_dim, highway_dim],
                        # glorit init
                        initializer=tf.random_normal_initializer(
                            mean=0.0, stddev=np.sqrt(1.0 / highway_dim)),
                        dtype=DTYPE)
                    b_carry = tf.get_variable(
                        'b_carry', [highway_dim],
                        initializer=tf.constant_initializer(-2.0),
                        dtype=DTYPE)
                    W_transform = tf.get_variable(
                        'W_transform', [highway_dim, highway_dim],
                        initializer=tf.random_normal_initializer(
                            mean=0.0, stddev=np.sqrt(1.0 / highway_dim)),
                        dtype=DTYPE)
                    b_transform = tf.get_variable(
                        'b_transform', [highway_dim],
                        initializer=tf.constant_initializer(0.0),
                        dtype=DTYPE)

                embedding = high(embedding, W_carry, b_carry, W_transform,
                                 b_transform)
                if self.bidirectional:
                    embedding_reverse = high(embedding_reverse, W_carry,
                                             b_carry, W_transform, b_transform)
                self.token_embedding_layers.append(
                    tf.reshape(embedding,
                               [batch_size, unroll_steps, highway_dim]))

        # finally project down to projection dim if needed
        if use_proj:
            embedding = tf.matmul(embedding, W_proj_cnn) + b_proj_cnn
            if self.bidirectional:
                embedding_reverse = tf.matmul(embedding_reverse, W_proj_cnn) \
                    + b_proj_cnn
            self.token_embedding_layers.append(
                tf.reshape(embedding,
                           [batch_size, unroll_steps, projection_dim]))

        # reshape back to (batch_size, tokens, dim)
        if use_highway or use_proj:
            shp = [batch_size, unroll_steps, projection_dim]
            embedding = tf.reshape(embedding, shp)
            if self.bidirectional:
                embedding_reverse = tf.reshape(embedding_reverse, shp)

        # at last assign attributes for remainder of the model
        self.embedding = embedding
        if self.bidirectional:
            self.embedding_reverse = embedding_reverse
Beispiel #45
0
import tensorflow as tf

with tf.Session() as sess:
    init1 = tf.random_uniform_initializer(-1.0, 1.0)
    var1 = tf.get_variable("var1", shape=[2, 4], initializer=init1)
    var1_ = tf.Variable(tf.random_uniform([2, 4], -1.0, 1.0))

    init2 = tf.constant_initializer(1.0, dtype=tf.float32)
    var2 = tf.get_variable("var2", shape=[2, 4], initializer=init2)
    var2_ = tf.Variable(tf.constant(1.0, shape=[2, 4], dtype=tf.float32))

    init3 = tf.random_normal_initializer(1.0, dtype=tf.float32)
    var3 = tf.get_variable("var3", shape=[2, 4], initializer=init3)
    var3_ = tf.Variable(tf.random_normal([2, 4], 0.0, 1.0, dtype=tf.float32))

    init4 = tf.truncated_normal_initializer(0.0, 1.0, dtype=tf.float32)
    var4 = tf.get_variable("var4", shape=[2, 4], initializer=init4)
    var4_ = tf.Variable(tf.truncated_normal([2, 4], 0.0, 1.0,
                                            dtype=tf.float32))

    init5 = tf.zeros_initializer([2, 2], dtype=tf.float32)
    var5 = tf.get_variable("var5", initializer=init5)
    var5_ = tf.Variable(tf.zeros(shape=[2, 4], dtype=tf.float32))

    init6 = tf.ones_initializer([2, 2], dtype=tf.float32)
    var6 = tf.get_variable("var6", initializer=init6)
    var6_ = tf.Variable(tf.ones(shape=[2, 4], dtype=tf.float32))

    init7 = tf.uniform_unit_scaling_initializer(dtype=tf.float32)
    var7 = tf.get_variable("var7", shape=[2, 4], initializer=init7)
Beispiel #46
0
def batchnorm(inputs):
    return tf.layers.batch_normalization(inputs, axis=3, epsilon=1e-5, momentum=0.1, training=True, gamma_initializer=tf.random_normal_initializer(1.0, 0.02))
    def inference(self, mode, inputs):
        is_training = mode == 'TRAIN'

        ###decode your inputs
        [image, im_info, gt_boxes] = inputs

        image.set_shape([None, None, None, 3])
        im_info.set_shape([None, cfg.nr_info_dim])
        if mode == 'TRAIN':
            gt_boxes.set_shape([None, None, 5])
        ##end of decode

        num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios)
        bottleneck = resnet_v1.bottleneck

        blocks = [
            resnet_utils.Block('block1', bottleneck,
                               [(144, 24, 2, 1)] + [(144, 24, 1, 1)] * 3),
            resnet_utils.Block('block2', bottleneck,
                               [(288, 144, 2, 1)] + [(288, 144, 1, 1)] * 7),
            resnet_utils.Block('block3', bottleneck,
                               [(576, 288, 1, 1)] + [(576, 288, 1, 1)] * 3),
        ]


        with slim.arg_scope(resnet_arg_scope(is_training=False)):
            with tf.variable_scope('resnet_v1_xception', 'resnet_v1_xception'):
                net = resnet_utils.conv2d_same(
                    image, 24, 3, stride=2, scope='conv1') #rate (atrous conv must be delete)
                net = slim.max_pool2d(
                    net, [3, 3], stride=2, padding='SAME', scope='pool1')
            net, _ = resnet_v1.resnet_v1(
                net, blocks[0:1], global_pool=False, include_root_block=False,
                scope='resnet_v1_xception')
                
        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv3, _ = resnet_v1.resnet_v1(
                net, blocks[1:2], global_pool=False, include_root_block=False,
                scope='resnet_v1_xception')

        with slim.arg_scope(resnet_arg_scope(is_training=is_training)):
            net_conv4, _ = resnet_v1.resnet_v1(
                net_conv3, blocks[2:3], global_pool=False,
                include_root_block=False, scope='resnet_v1_xception')


        initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
        initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

        with tf.variable_scope(
                'resnet_v1_xception', 'resnet_v1_xception',
                regularizer=tf.contrib.layers.l2_regularizer(
                    cfg.weight_decay)):
            # rpn
            rpn = slim.conv2d(
                net_conv3, 256, [3, 3], trainable=is_training,
                weights_initializer=initializer, activation_fn=nn_ops.relu,
                scope="rpn_conv/3x3")
            rpn_cls_score = slim.conv2d(
                rpn, num_anchors * 2, [1, 1], trainable=is_training,
                weights_initializer=initializer, padding='VALID',
                activation_fn=None, scope='rpn_cls_score')
            rpn_bbox_pred = slim.conv2d(
                rpn, num_anchors * 4, [1, 1], trainable=is_training,
                weights_initializer=initializer, padding='VALID',
                activation_fn=None, scope='rpn_bbox_pred')

            # generate anchor
            height = tf.cast(tf.shape(rpn)[1], tf.float32)
            width = tf.cast(tf.shape(rpn)[2], tf.float32)
            anchors = generate_anchors_opr(
                height, width, cfg.stride[0], cfg.anchor_scales,
                cfg.anchor_ratios)
            # change it so that the score has 2 as its channel size
            rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob')
            rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score))

            rois, roi_scores,rois_before_nms = proposal_opr(
                rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride,
                anchors, num_anchors, is_tfchannel=True, is_tfnms=False)

            if is_training:
                with tf.variable_scope('anchor') as scope:
                    rpn_labels, rpn_bbox_targets = \
                        tf.py_func(
                            anchor_target_layer,
                            [gt_boxes, im_info, cfg.stride, anchors,
                             num_anchors],
                            [tf.float32, tf.float32])
                    rpn_labels = tf.to_int32(rpn_labels, name="to_int32")

                with tf.control_dependencies([rpn_labels]):
                    with tf.variable_scope('rpn_rois') as scope:
                        rois, labels, bbox_targets = \
                            tf.py_func(
                                proposal_target_layer,
                                [rois, gt_boxes, im_info],
                                [tf.float32, tf.float32, tf.float32])
                        labels = tf.to_int32(labels, name="to_int32")


        with tf.variable_scope(
                'resnet_v1_xception', 'resnet_v1_xception',
                regularizer=tf.contrib.layers.l2_regularizer(
                    cfg.weight_decay)):

            ps_chl = 7 * 7 * 10
            ps_fm = rfcn_plus_plus_opr.global_context_module(
                 net_conv4, prefix='conv_new_1',
#                ks=15, chl_mid=256, chl_out=ps_chl)
                 ks=15, chl_mid=64, chl_out=ps_chl)
            ps_fm = nn_ops.relu(ps_fm)

            [psroipooled_rois, _, _] =  psalign_pooling_op.psalign_pool(
                ps_fm, rois, group_size=7,
                sample_height=2, sample_width=2, spatial_scale=1.0/16.0)

            #[psroipooled_rois, _] = psroi_pooling_op.psroi_pool(
            #    ps_fm, rois, group_size=7, spatial_scale=1.0 / 16.0)
            psroipooled_rois = slim.flatten(psroipooled_rois)
            ps_fc_1 = slim.fully_connected(
                psroipooled_rois, 2048, weights_initializer=initializer,
                activation_fn=nn_ops.relu, trainable=is_training, scope='ps_fc_1')
            cls_score = slim.fully_connected(
                ps_fc_1, cfg.num_classes, weights_initializer=initializer,
                activation_fn=None, trainable=is_training, scope='cls_fc')
            bbox_pred = slim.fully_connected(
                ps_fc_1, 4 * cfg.num_classes, weights_initializer=initializer_bbox,
                activation_fn=None, trainable=is_training, scope='bbox_fc')

            cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob")

            #conv_new_1 = slim.conv2d(
            #    net_conv5, 1024, [1, 1], trainable=is_training,
            #    weights_initializer=initializer, activation_fn=nn_ops.relu,
            #    scope="conv_new_1")
            #rfcn_cls = slim.conv2d(
            #    conv_new_1, 7 * 7 * cfg.num_classes, [1, 1],
            #    trainable=is_training, weights_initializer=initializer,
            #    activation_fn=None, scope="rfcn_cls")
            #rfcn_bbox = slim.conv2d(
            #    conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training,
            #    weights_initializer=initializer,
            #    activation_fn=None, scope="rfcn_bbox")

            #[psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool(
            #    rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0)
            #[psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool(
            #    rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0)

            #cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2])
            #bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2])
            #cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob")
            # cls_prob = tf.nn.softmax(cls_score, name="cls_prob")
            #bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes])

        if not is_training:
            stds = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes))
            means = np.tile(
                np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes))
            bbox_pred *= stds
            bbox_pred += means

            ##############add prediction#####################
            tf.add_to_collection("rpn_cls_score", rpn_cls_score)
            tf.add_to_collection("rpn_cls_prob", rpn_cls_prob)
            tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred)
            tf.add_to_collection("cls_score", cls_score)
            tf.add_to_collection("cls_prob", cls_prob)
            tf.add_to_collection("bbox_pred", bbox_pred)
            tf.add_to_collection("rois", rois)

            tf.add_to_collection("rois_before_nms", rois_before_nms)
            tf.add_to_collection("roi_scores", roi_scores)

        else:
            #--------------------  rpn loss ---------------------------------#
            from detection_opr.utils import loss_opr_without_box_weight
            rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn(
                tf.reshape(rpn_bbox_pred, [-1, 4]),
                tf.reshape(rpn_bbox_targets, [-1, 4]),
                tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn)

            rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2])
            rpn_label = tf.reshape(rpn_labels, [-1])
            rpn_select = tf.where(tf.not_equal(rpn_label, -1))
            rpn_cls_score = tf.reshape(
                tf.gather(rpn_cls_score, rpn_select), [-1, 2])
            rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1])
            rpn_cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=rpn_cls_score, labels=rpn_label))

            #-------------------- rcnn loss  --------------------------------#
            label = tf.reshape(labels, [-1])
            cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss(
                tf.reshape(cls_score, [-1, cfg.num_classes]), label,
                bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling,
                cfg.num_classes)
            loss_box *= 2

            #--------------------add to colloection ------------------------#
            tf.add_to_collection('loss_cls', cross_entropy)
            tf.add_to_collection('loss_box', loss_box)
            tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy)
            tf.add_to_collection('rpn_loss_box', rpn_loss_box)
            loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box
            tf.add_to_collection('losses', loss)
            return loss
Beispiel #48
0
def discrim_conv(batch_input, out_channels, stride):
    padded_input = tf.pad(batch_input, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="CONSTANT")
    return tf.layers.conv2d(padded_input, out_channels, kernel_size=4, strides=(stride, stride), padding="valid", kernel_initializer=tf.random_normal_initializer(0, 0.02))
Beispiel #49
0
    def __init__(
            self,
            prev_layer,
            decay=0.9,
            epsilon=0.00001,
            act=None,
            is_train=False,
            beta_init=tf.zeros_initializer,
            gamma_init=tf.random_normal_initializer(mean=1.0, stddev=0.002),
            moving_mean_init=tf.zeros_initializer(),
            name='batchnorm_layer',
    ):
        super(BatchNormLayer, self).__init__(prev_layer=prev_layer, act=act, name=name)

        # logging.info(
        #     "BatchNormLayer %s: decay: %f epsilon: %f act: %s is_train: %s" %
        #     (self.name, decay, epsilon, self.act.__name__ if self.act is not None else 'No Activation', is_train)
        # )

        x_shape = self.inputs.get_shape()
        params_shape = x_shape[-1:]

        with tf.variable_scope(name):
            axis = list(range(len(x_shape) - 1))
            # 1. beta, gamma
            variables = []

            if beta_init:

                if beta_init == tf.zeros_initializer:
                    beta_init = beta_init()

                beta = tf.get_variable(
                    'beta', shape=params_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train
                )

                variables.append(beta)

            else:
                beta = None

            if gamma_init:
                gamma = tf.get_variable(
                    'gamma',
                    shape=params_shape,
                    initializer=gamma_init,
                    dtype=LayersConfig.tf_dtype,
                    trainable=is_train,
                )
                variables.append(gamma)
            else:
                gamma = None

            # 2.

            moving_mean = tf.get_variable(
                'moving_mean', params_shape, initializer=moving_mean_init, dtype=LayersConfig.tf_dtype, trainable=False
            )

            moving_variance = tf.get_variable(
                'moving_variance',
                params_shape,
                initializer=tf.constant_initializer(1.),
                dtype=LayersConfig.tf_dtype,
                trainable=False,
            )

            # 3.
            # These ops will only be preformed when training.
            mean, variance = tf.nn.moments(self.inputs, axis)

            update_moving_mean = moving_averages.assign_moving_average(
                moving_mean, mean, decay, zero_debias=False
            )  # if zero_debias=True, has bias

            update_moving_variance = moving_averages.assign_moving_average(
                moving_variance, variance, decay, zero_debias=False
            )  # if zero_debias=True, has bias

            def mean_var_with_update():
                with tf.control_dependencies([update_moving_mean, update_moving_variance]):
                    return tf.identity(mean), tf.identity(variance)

            if is_train:
                mean, var = mean_var_with_update()
            else:
                mean, var = moving_mean, moving_variance

            self.outputs = self._apply_activation(
                tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon)
            )

            variables.extend([moving_mean, moving_variance])

        self._add_layers(self.outputs)
        self._add_params(variables)
# Add tensorflow's default dropout. Can not simply add dropout to all hidden units in a LSTM, as they have relationships with each other
lstm_drop =  tf.contrib.rnn.DropoutWrapper(lstm, input_p, output_p)

hyp = tf.placeholder(tf.float32, [N, l_h, D], 'hypothesis')
evi = tf.placeholder(tf.float32, [N, l_e, D], 'evidence')

# lstm_size: the size of the gates in the LSTM, as in the first LSTM layer's initialization.
# The LSTM used for looking backwards through the sentences, similar to lstm.
lstm_back = tf.contrib.rnn.BasicLSTMCell(lstm_size)

# A dropout wrapper for lstm_back, like lstm_drop.
lstm_drop_back = tf.contrib.rnn.DropoutWrapper(lstm_back, input_p, output_p)

# Initial values for the fully connected layer's weights.
fc_initializer = tf.random_normal_initializer(stddev=0.1) 
fc_weight = tf.get_variable('fc_weight', [2*hidden_size, 3])
fc_bias = tf.get_variable('bias', [3])


# x: the inputs to the bidirectional_rnn
x = tf.concat([hyp, evi], 1)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2]) # (Le+Lh), N, d
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, vector_size]) # (Le+Lh)*N, d
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(x, l_seq,)

rnn_outputs, _, _ = tf.contrib.rnn.static_bidirectional_rnn(lstm, lstm_back, x, dtype=tf.float32)
Beispiel #51
0
    def _build_net(self):
        # ------------------ all inputs ------------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features],
                                name='s')  # input State
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features],
                                 name='s_')  # input Next State
        self.r = tf.placeholder(tf.float32, [
            None,
        ], name='r')  # input Reward
        self.a = tf.placeholder(tf.int32, [
            None,
        ], name='a')  # input Action

        w_initializer, b_initializer = tf.random_normal_initializer(
            0., 0.3), tf.constant_initializer(0.1)

        # ------------------ build evaluate_net ------------------
        with tf.variable_scope('eval_net'):
            e1 = tf.layers.dense(self.s,
                                 20,
                                 tf.nn.relu,
                                 kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer,
                                 name='e1')
            self.q_eval = tf.layers.dense(e1,
                                          self.n_actions,
                                          kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer,
                                          name='q')

        # ------------------ build target_net ------------------
        with tf.variable_scope('target_net'):
            t1 = tf.layers.dense(self.s_,
                                 20,
                                 tf.nn.relu,
                                 kernel_initializer=w_initializer,
                                 bias_initializer=b_initializer,
                                 name='t1')
            self.q_next = tf.layers.dense(t1,
                                          self.n_actions,
                                          kernel_initializer=w_initializer,
                                          bias_initializer=b_initializer,
                                          name='t2')

        with tf.variable_scope('q_target'):
            q_target = self.r + self.gamma * tf.reduce_max(
                self.q_next, axis=1, name='Qmax_s_')  # shape=(None, )
            self.q_target = tf.stop_gradient(q_target)
        with tf.variable_scope('q_eval'):
            a_indices = tf.stack(
                [tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a],
                axis=1)
            self.q_eval_wrt_a = tf.gather_nd(
                params=self.q_eval, indices=a_indices)  # shape=(None, )
        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target,
                                      self.q_eval_wrt_a,
                                      name='TD_error'))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(
                self.loss)
Beispiel #52
0
 def feed_neural_work(self):
     with tf.name_scope('regression'):
         #W = tf.Variable(tf.zeros(shape = [(self.total_embedding_dim - self.filter_sizes[0] + 1) * self.num_filters * 2,2]),name = 'W')
         #修改
         # self.represent=tf.concat([self.represent_imag,self.represent_real],1)
         # self.real_kernel,self.imag_kernel=self.set_weight(664,2)
         # cat_kernels_4_real = tf.concat([self.real_kernel, -self.imag_kernel],axis=-1)
         # cat_kernels_4_imag = tf.concat([self.imag_kernel, self.real_kernel],axis=-1)
         # cat_kernels_4_complex = tf.concat([cat_kernels_4_real, cat_kernels_4_imag],axis=0)
         # self.full_join_real_1=tf.matmul(self.represent,cat_kernels_4_complex)
         #修改
         #之前
         # print(self.full_join_real_1)
         # exit()
         # self.full_join_real_1=tf.matmul(self.represent_real,self.real_kernel_1)-tf.matmul(self.represent_imag,self.imag_kernel_1)
         # self.full_join_imag_1=tf.matmul(self.represent_real,self.imag_kernel_1)+tf.matmul(self.represent_imag,self.real_kernel_1)
         # self.real_kernel_2,self.imag_kernel_2=self.set_weight(1,1)
         # # cat_kernels_4_real = tf.concat([self.real_kernel, -self.imag_kernel],axis=-1)
         # # cat_kernels_4_imag = tf.concat([self.imag_kernel, self.real_kernel],axis=-1)
         # # cat_kernels_4_complex = tf.concat([cat_kernels_4_real, cat_kernels_4_imag],axis=0)
         # self.full_join_real_2=tf.matmul(self.full_join_real_1,self.real_kernel_2)-tf.matmul(self.full_join_imag_1,self.imag_kernel_2)
         # self.full_join_imag_2=tf.matmul(self.full_join_real_1,self.imag_kernel_2)+tf.matmul(self.full_join_imag_1,self.real_kernel_2)
         # b = tf.get_variable('b_hidden', shape=[2],initializer = tf.random_normal_initializer())
         # self.logits=tf.concat([self.full_join_real_2,self.full_join_imag_2],1)+b
         # self.logits = tf.nn.xw_plus_b(self.full_join_real_1, W, b, name = "scores")
         # self.concat_out = tf.matmul(self.represent, cat_kernels_4_complex)
         #之前
         regularizer = tf.contrib.layers.l2_regularizer(self.l2_reg_lambda)
         W = tf.get_variable(
             "W_hidden",
             #shape=[102,self.hidden_num],
             shape=[3 * self.num_filters + 2, self.hidden_num],
             initializer=tf.contrib.layers.xavier_initializer(),
             regularizer=regularizer)
         b = tf.get_variable('b_hidden',
                             shape=[self.hidden_num],
                             initializer=tf.random_normal_initializer(),
                             regularizer=regularizer)
         self.para.append(W)
         self.para.append(b)
         self.hidden_output = tf.nn.tanh(
             tf.nn.xw_plus_b(self.represent, W, b, name="hidden_output"))
         #self.hidden_output=tf.nn.dropout(self.hidden_output, self.dropout_keep_prob, name="hidden_output_drop")
         W = tf.get_variable(
             "W_output",
             shape=[self.hidden_num, 2],
             initializer=tf.contrib.layers.xavier_initializer(),
             regularizer=regularizer)
         b = tf.get_variable('b_output',
                             shape=[2],
                             initializer=tf.random_normal_initializer(),
                             regularizer=regularizer)
         self.para.append(W)
         self.para.append(b)
         self.logits = tf.nn.xw_plus_b(self.hidden_output,
                                       W,
                                       b,
                                       name="scores")
         print(self.logits)
         self.scores = tf.nn.softmax(self.logits)
         self.predictions = tf.argmax(self.scores, 1, name="predictions")
Beispiel #53
0
    def _build_net(self):
        # ------------------ build evaluate_net ------------------
        self.s = tf.placeholder(tf.float32, [None, self.n_features],
                                name='s')  # input
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions],
                                       name='Q_target')  # for calculating loss
        with tf.variable_scope('eval_net'):
            # c_names(collections_names) are the collections to store variables
            c_names, n_l1, w_initializer, b_initializer = \
                ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 10, \
                tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1)  # config of layers

            # first layer. collections is used later when assign to target net
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(self.s, w1) + b1)

            # second layer. collections is used later when assign to target net
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions],
                                     initializer=w_initializer,
                                     collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions],
                                     initializer=b_initializer,
                                     collections=c_names)
                self.q_eval = tf.matmul(l1, w2) + b2

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(
                self.loss)

        # ------------------ build target_net ------------------
        self.s_ = tf.placeholder(tf.float32, [None, self.n_features],
                                 name='s_')  # input
        with tf.variable_scope('target_net'):
            # c_names(collections_names) are the collections to store variables
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]

            # first layer. collections is used later when assign to target net
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(self.s_, w1) + b1)

            # second layer. collections is used later when assign to target net
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, self.n_actions],
                                     initializer=w_initializer,
                                     collections=c_names)
                b2 = tf.get_variable('b2', [1, self.n_actions],
                                     initializer=b_initializer,
                                     collections=c_names)
                self.q_next = tf.matmul(l1, w2) + b2
def CNN():

	# g = tf.get_default_graph()
	# with g.gradient_override_map({'Relu': 'GuidedRelu'}):
	
	if init_method == 0:
		initializer_wb= tf.random_normal_initializer()
	elif init_method == 1:
		initializer_wb= tf.contrib.layers.xavier_initializer()
	elif init_method == 2:
		initializer_wb= tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")		

	conv_layer_1 = tf.layers.conv2d( inputs=shaper, 
									 filters=64,
									 kernel_size=[3, 3],
									 padding="same",
									 strides=1,
									 activation=tf.nn.relu,
									 kernel_initializer=initializer_wb,
									 name = "conv1")
	pool_layer_1 = tf.layers.max_pooling2d(inputs=conv_layer_1, pool_size=[2, 2], strides=2)




  	conv_layer_2 = tf.layers.conv2d( inputs=pool_layer_1,
									  filters=128,
									  kernel_size=[3, 3],
									  padding="same",
									  activation=tf.nn.relu,
									  kernel_initializer=initializer_wb)
	pool_layer_2 = tf.layers.max_pooling2d(inputs=conv_layer_2, pool_size=[2, 2], strides=2)




	conv_layer_3 = tf.layers.conv2d( inputs=pool_layer_2,
									  filters=256,
									  kernel_size=[3, 3],
									  padding="same",
									  activation=tf.nn.relu,
									 kernel_initializer=initializer_wb)
	


	conv_layer_4 = tf.layers.conv2d( inputs=conv_layer_3,
							  filters=256,
							  kernel_size=[3, 3],
							  padding="same",
							  activation=tf.nn.relu,
							  kernel_initializer=initializer_wb)
	pool_layer_3 = tf.layers.max_pooling2d(inputs=conv_layer_4, pool_size=[2, 2], strides=2)

	grads_40 = [tf.gradients(conv_layer_4[0,i,0,:], X_ph) for i in np.arange(5)]
	grads_41 = [tf.gradients(conv_layer_4[0,i,1,:], X_ph) for i in np.arange(5)]

	flattened_layer = tf.reshape(pool_layer_3, [-1, 3 * 3 * 256])


	dense_layer_1 = tf.layers.dense(inputs=flattened_layer, units=1024, activation=tf.nn.relu,kernel_initializer=initializer_wb)
	dense_layer_2 = tf.layers.dense(inputs=dense_layer_1, units=1024, activation=tf.nn.relu,kernel_initializer=initializer_wb)



	#dropped = tf.nn.dropout(dense_layer_2, dropout)
	#tf.contrib.layers.batch_norm(dropped,center=True, scale=True, is_training = training_phase,)


	logit_layer = tf.layers.dense(inputs=dense_layer_2, units=k)
# output = tf.nn.softmax(logits, name="softmax_tensor")
# classes = tf.argmax(input=output, axis=1)
	return logit_layer, grads_40, grads_41
Beispiel #55
0
def conv_layer(input_data,
               height,
               width,
               x_stride,
               y_stride,
               filter_num,
               name,
               activation_method="relu",
               alpha=0.2,
               padding="VALID",
               atrous=1,
               init_method="xavier",
               bias_term=True,
               is_pretrain=True):
    """
    convolutional layer
    :param input_data: the input data tensor [batch_size, height, width, channels]
    :param height: the height of the convolutional kernel
    :param width: the width of the convolutional kernel
    :param x_stride: stride in X axis
    :param y_stride: stride in Y axis
    :param filter_num: the number of the convolutional kernel
    :param name: the name of the layer
    :param activation_method: the type of activation function (default: relu)
    :param alpha: leaky relu alpha (default: 0.2)
    :param padding: the padding method, "SAME" | "VALID" (default: "VALID")
    :param atrous: the dilation rate, if atrous == 1, conv, if atrous > 1, dilated conv (default: 1)
    :param init_method: the method of weights initialization (default: xavier)
    :param bias_term: whether the bias term exists or not (default: False)
    :param is_pretrain: whether the parameters are trainable (default: True)

    :return:
        output: a 4-D tensor [number, height, width, channel]
    """
    channel = input_data.get_shape()[-1]

    # the method of weights initialization
    if init_method == "xavier":
        initializer = tf.contrib.layers.xavier_initializer()
    elif init_method == "gaussian":
        initializer = tf.random_normal_initializer(stddev=0.01)
    else:
        initializer = tf.truncated_normal_initializer(stddev=0.01)

    # the initialization of the weights and biases
    with tf.variable_scope(name):
        weights = tf.get_variable(name="weights",
                                  shape=[height, width, channel, filter_num],
                                  dtype=tf.float32,
                                  initializer=initializer,
                                  trainable=is_pretrain)
        biases = tf.get_variable(name="biases",
                                 shape=[filter_num],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.0),
                                 trainable=is_pretrain)

        # the method of convolution
        if atrous == 1:
            feature_map = tf.nn.conv2d(input=input_data,
                                       filter=weights,
                                       strides=[1, x_stride, y_stride, 1],
                                       padding=padding,
                                       name="conv")
        else:
            feature_map = tf.nn.atrous_conv2d(value=input_data,
                                              filters=weights,
                                              rate=atrous,
                                              padding=padding,
                                              name="atrous_conv")
        # biases term
        if bias_term is True:
            output = tf.nn.bias_add(value=feature_map,
                                    bias=biases,
                                    name="biases_add")
        else:
            output = feature_map

        # info show
        shape = output.get_shape()
        print("name: %s, shape: (%d, %d, %d), activation: %s" %
              (name, shape[1], shape[2], shape[3], activation_method))

        # activation
        output = activation_layer(output, activation_method, alpha)

        return output
Beispiel #56
0
import tensorflow as tf

from alphai_rickandmorty_oracle.architecture.abstract import AbstractGanArchitecture

import alphai_rickandmorty_oracle.tflib as lib
import alphai_rickandmorty_oracle.tflib.ops.linear
import alphai_rickandmorty_oracle.tflib.ops.conv2d
import alphai_rickandmorty_oracle.tflib.ops.deconv2d

INIT_KERNEL = tf.random_normal_initializer(mean=0.0, stddev=0.02)
OUTPUT_DIM = 784  # Number of pixels in MNIST (28*28)
DIM = 64
Z_DIM = 128
DISC_FILTER_SIZE = 5


def leaky_relu(x, alpha=0.1, name=None):
    if name:
        with tf.variable_scope(name):
            return _impl_leaky_relu(x, alpha)
    else:
        return _impl_leaky_relu(x, alpha)


def _impl_leaky_relu(x, alpha):
    return tf.nn.relu(x) - (alpha * tf.nn.relu(-x))


class BrainwavesGanArchitecture(AbstractGanArchitecture):
    def __init__(self, output_dimensions, plot_dimensions):
        super().__init__(output_dimensions, plot_dimensions)
Beispiel #57
0
def fc_layer(input_data,
             output_dim,
             name,
             activation_method="relu",
             alpha=None,
             init_method="xavier",
             is_train=True):
    """
    fully-connected layer
    :param input_data: the input data
    :param output_dim: the dimension of the output data
    :param name: name of the layer
    :param activation_method: the type of activation function
    :param alpha: leakey relu alpha
    :param init_method: the method of weights initialization (default: xavier)
    :param is_train: if False, skip this layer, default is True
    :return:
        output: a 2-D tensor [batch_size, channel]
    """
    if is_train is True:
        shape = input_data.get_shape()
        if len(shape) == 4:
            input_dim = shape[1].value * shape[2].value * shape[3].value
        else:
            input_dim = shape[-1].value

        flat_input_data = tf.reshape(input_data, [-1, input_dim])
        if init_method is None:
            output = input_data
        else:
            with tf.variable_scope(name):
                # the method of weights initialization
                if init_method == "xavier":
                    initializer = tf.contrib.layers.xavier_initializer()
                elif init_method == "gaussian":
                    initializer = tf.random_normal_initializer(stddev=0.01)
                else:
                    initializer = tf.truncated_normal_initializer(stddev=0.01)

                weights = tf.get_variable(name="weight",
                                          shape=[input_dim, output_dim],
                                          dtype=tf.float32,
                                          initializer=initializer)

                biases = tf.get_variable(
                    name="biases",
                    shape=[output_dim],
                    dtype=tf.float32,
                    initializer=tf.constant_initializer(0.0))

                output = tf.nn.bias_add(value=tf.matmul(
                    flat_input_data, weights),
                                        bias=biases,
                                        name="fc_bias_add")

                output = activation_layer(input_data=output,
                                          activation_method=activation_method,
                                          alpha=alpha)

                print("name: %s, shape: %d -> %d, activation:%s, alpha = %r" %
                      (name, input_dim, output_dim, activation_method, alpha))
    else:
        output = input_data

    return output
Beispiel #58
0
    def generator(z, batch_size, mode='train'):
        with tf.variable_scope("generator") as scope:
            if mode=='train':
                trainable = True
                pass
            elif mode=='sampler':
                trainable = False
                scope.reuse_variables()
            else:
                assert 0,'Unkown mode for generator.'

            s_h, s_w = dcgan.output_height, dcgan.output_width
            s_h2, s_w2 = cs.conv_out_size_same(s_h, 2),  cs.conv_out_size_same(s_w, 2)
            s_h4, s_w4 = cs.conv_out_size_same(s_h2, 2),  cs.conv_out_size_same(s_w2, 2)
            s_h8, s_w8 = cs.conv_out_size_same(s_h4, 2),  cs.conv_out_size_same(s_w4, 2)
            s_h16, s_w16 = cs.conv_out_size_same(s_h8, 2),  cs.conv_out_size_same(s_w8, 2)
            s_h32, s_w32 = cs.conv_out_size_same(s_h16, 2),  cs.conv_out_size_same(s_w16, 2)
            s_h64, s_w64 = cs.conv_out_size_same(s_h32, 2),  cs.conv_out_size_same(s_w32, 2)

            # assert s_h16*s_w16*self.gf_dim*8==z.shape[1],str(s_h16*s_w16*self.gf_dim*8)+' != '+str(z.shape[1])

            # project `z` and reshape
            dcgan.z_ = tf.layers.dense(z,dcgan.gf_dim * 8 * s_h64 * s_w64,
                                kernel_initializer=tf.random_normal_initializer(stddev=0.02), 
                                bias_initializer=tf.constant_initializer (0.01),
                                use_bias=1,activation=None,name='g_h0_lin')
            with tf.variable_scope('g_h0_lin', reuse=True):
                dcgan.h0_w = tf.get_variable('kernel')
                dcgan.h0_b = tf.get_variable('bias')
            dcgan.h0 = tf.reshape(dcgan.z_, [batch_size, s_h64, s_w64, dcgan.gf_dim * 8])
            h0 = tf.contrib.layers.batch_norm(dcgan.h0,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn0')
            h0 = tf.nn.relu(h0)

            dcgan.h1, dcgan.h1_w, dcgan.h1_b = cs.deconv2d(h0, [batch_size, s_h32, s_w32, dcgan.gf_dim * 8], 
                                                    name='g_h1', with_w=True)
            h1 = tf.contrib.layers.batch_norm(dcgan.h1,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn1')
            h1 = tf.nn.relu(h1) 

            h2, dcgan.h2_w, dcgan.h2_b = cs.deconv2d(h1, [batch_size, s_h16, s_w16, dcgan.gf_dim * 4],
                                                     name='g_h2', with_w=True)
            h2 = tf.contrib.layers.batch_norm(h2,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn2')
            h2 = tf.nn.relu(h2) 

            h3, dcgan.h3_w, dcgan.h3_b = cs.deconv2d(h2, [batch_size, s_h8, s_w8, dcgan.gf_dim * 4],
                                                     name='g_h3', with_w=True)
            h3 = tf.contrib.layers.batch_norm(h3,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn3')
            h3 = tf.nn.relu(h3) 
            
            h4, dcgan.h4_w, dcgan.h4_b = cs.deconv2d(h3, [batch_size, s_h4, s_w4, dcgan.gf_dim * 2],
                                                     name='g_h4', with_w=True)
            h4 = tf.contrib.layers.batch_norm(h4,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn4')
            h4 = tf.nn.relu(h4) 
            
            h5, dcgan.h5_w, dcgan.h5_b = cs.deconv2d(h4, [batch_size, s_h2, s_w2, dcgan.gf_dim * 2],
                                                     name='g_h5', with_w=True)
            h5 = tf.contrib.layers.batch_norm(h5,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='g_bn5')
            h5 = tf.nn.relu(h5) 
            
            h6 = tf.layers.conv2d(inputs=h5, filters=dcgan.gf_dim * 2, kernel_size=[5,5], 
                     strides=(1,1),padding='same',
                     activation=None, name='d_h6_conv')
            h6 = tf.contrib.layers.batch_norm(h6,decay=0.9,updates_collections=None,
                            epsilon=1e-5,scale=True,is_training=trainable,scope='d_bn6')           
            
            h7, dcgan.h7_w, dcgan.h7_b = cs.deconv2d(h6, [batch_size, s_h, s_w, dcgan.c_dim],
                                                     name='g_h7', with_w=True)

            return tf.nn.tanh(h7)
Beispiel #59
0
    def _build_loss(self, lstm_outputs):
        '''
        Create:
            self.total_loss: total loss op for training
            self.softmax_W, softmax_b: the softmax variables
            self.next_token_id / _reverse: placeholders for gold input

        '''
        batch_size = self.options['batch_size']
        unroll_steps = self.options['unroll_steps']

        n_tokens_vocab = self.options['n_tokens_vocab']

        # DEFINE next_token_id and *_reverse placeholders for the gold input
        def _get_next_token_placeholders(suffix):
            name = 'next_token_id' + suffix
            id_placeholder = tf.placeholder(DTYPE_INT,
                                            shape=(batch_size, unroll_steps),
                                            name=name)
            return id_placeholder

        # get the window and weight placeholders
        self.next_token_id = _get_next_token_placeholders('')
        if self.bidirectional:
            self.next_token_id_reverse = _get_next_token_placeholders(
                '_reverse')

        # DEFINE THE SOFTMAX VARIABLES
        # get the dimension of the softmax weights
        # softmax dimension is the size of the output projection_dim
        softmax_dim = self.options['lstm']['projection_dim']

        # the output softmax variables -- they are shared if bidirectional
        if self.share_embedding_softmax:
            # softmax_W is just the embedding layer
            self.softmax_W = self.embedding_weights

        with tf.variable_scope('softmax'), tf.device('/cpu:0'):
            # Glorit init (std=(1.0 / sqrt(fan_in))
            softmax_init = tf.random_normal_initializer(
                0.0, 1.0 / np.sqrt(softmax_dim))
            if not self.share_embedding_softmax:
                self.softmax_W = tf.get_variable('W',
                                                 [n_tokens_vocab, softmax_dim],
                                                 dtype=DTYPE,
                                                 initializer=softmax_init)
            self.softmax_b = tf.get_variable(
                'b', [n_tokens_vocab],
                dtype=DTYPE,
                initializer=tf.constant_initializer(0.0))

        # now calculate losses
        # loss for each direction of the LSTM
        self.individual_losses = []

        if self.bidirectional:
            next_ids = [self.next_token_id, self.next_token_id_reverse]
        else:
            next_ids = [self.next_token_id]
        #If bidirectional, there are two ids, one is forward, the other is backward
        for id_placeholder, lstm_output_flat in zip(next_ids, lstm_outputs):
            # flatten the LSTM output and next token id gold to shape:
            # (batch_size * unroll_steps, softmax_dim)
            # Flatten and reshape the token_id placeholders
            next_token_id_flat = tf.reshape(id_placeholder, [-1, 1])

            with tf.control_dependencies([lstm_output_flat]):
                if self.is_training and self.sample_softmax:
                    losses = tf.nn.sampled_softmax_loss(
                        self.softmax_W,
                        self.softmax_b,
                        next_token_id_flat,
                        lstm_output_flat,
                        self.options['n_negative_samples_batch'],
                        self.options['n_tokens_vocab'],
                        num_true=1)

                else:
                    # get the full softmax loss
                    output_scores = tf.matmul(
                        lstm_output_flat, tf.transpose(
                            self.softmax_W)) + self.softmax_b
                    # NOTE: tf.nn.sparse_softmax_cross_entropy_with_logits
                    #   expects unnormalized output since it performs the
                    #   softmax internally
                    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=output_scores,
                        labels=tf.squeeze(next_token_id_flat,
                                          squeeze_dims=[1]))

            self.individual_losses.append(tf.reduce_mean(losses))

        # now make the total loss -- it's the mean of the individual losses
        if self.bidirectional:
            self.total_loss = 0.5 * (self.individual_losses[0] +
                                     self.individual_losses[1])
        else:
            self.total_loss = self.individual_losses[0]
Beispiel #60
0
def dmnrun(fulldata, queask):
    # Loading saved meta graph
    sess = tf.Session()
    saver = tf.train.import_meta_graph("C:/Users/Mark/PycharmProjects/DMNTrain/weights/model.meta")
    saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Mark/PycharmProjects/DMNTrain/weights'))
    tf.reset_default_graph()

    def wideArray(x, weight):
        wide = np.zeros([len(x), weight])
        for i in range(0, len(x)):
            for j in range(0, len(x[i])):
                wide[i][j] = x[i][j]
        return wide

    def octalConv(x):
        ans = []
        rows = []
        words = []
        for line in x.split(' '):
            for word in line:
                number = ord(word)
                convNum = oct(number)
                convNum = int(convNum[2:])

            rows.append(ans)
            ans = []
            words.append(line)
        ans = wideArray(rows, 50)
        return ans, words

    def contextualize(data, quest):
        """
        Read in the input and question and build a context sets.
        Output is a list of data points, each of which is a 7-element tuple containing:
            The sentences in the context in vectorized form.
            The sentences in the context as a list of string tokens.
            The question in vectorized form.
            The question as a list of string tokens.
            The answer in vectorized form.
            The answer as a list of string tokens.
            A list of numbers for supporting statements, which is currently unused.
        """
        output = []
        context = []
        for entry in data:
            # Turn input into a word vector
            # TODO: Change to Octal Decimal encoding
            context.append(octalConv(entry[:-1]))
        # Wrap up object so DMN can use it
        comp_context = tuple(zip(*context))
        output.append(comp_context +
                      octalConv(quest) +
                      octalConv('Nothing') +
                      (0,))
        return output

    test_data = contextualize(fulldata, queask)

    final_train_data = []

    def finalize(data):
        """
        Prepares data generated by contextualize() for use in the network.
        """
        final_data = []
        for cqas in data:
            contextvs, contextws, qvs, qws, avs, aws, spt = cqas

            lspt = [spt]

            lengths = itertools.accumulate(len(cvec) for cvec in contextvs)
            context_vec = np.concatenate(contextvs)
            context_words = sum(contextws, [])

            # Location markers for the beginnings of new sentences.
            sentence_ends = np.array(list(lengths))
            final_data.append((context_vec, sentence_ends, qvs, lspt, context_words, cqas, avs, aws))
        return np.array(final_data)

    final_test_data = finalize(test_data)

    tf.reset_default_graph()

    # Hyperparameters

    # The number of dimensions used to store data passed between recurrent layers in the network.
    recurrent_cell_size = 128

    # The number of dimensions in our word vectorizations.
    D = 50

    # How quickly the network learns. Too high, and we may run into numeric instability
    # or other issues.
    learning_rate = 0.005

    # Dropout probabilities. For a description of dropout and what these probabilities are,
    # see Entailment with TensorFlow.
    input_p, output_p = 0.5, 0.5

    # How many questions we train on at a time.
    batch_size = 128

    # Number of passes in episodic memory. We'll get to this later.
    passes = 4

    # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
    ff_hidden_size = 256

    weight_decay = 0.00000001
    # The strength of our regularization. Increase to encourage sparsity in episodic memory,
    # but makes training slower. Don't make this larger than leraning_rate.

    training_iterations_count = 400000
    # How many questions the network trains on each time it is trained.
    # Some questions are counted multiple times.

    display_step = 1
    # How many iterations of training occur before each validation check.

    # Input Module

    # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
    # that contains all the context information.
    context = tf.placeholder(tf.float32, [None, None, D], "context")
    context_placeholder = context  # I use context as a variable name later on

    # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
    # contains the locations of the ends of sentences.
    input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

    # recurrent_cell_size: the number of hidden units in recurrent layers.
    input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

    # input_p: The probability of maintaining a specific hidden input unit.
    # Likewise, output_p is the probability of maintaining a specific hidden output unit.
    gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

    # dynamic_rnn also returns the final internal state. We don't need that, and can
    # ignore the corresponding output (_).
    input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module")

    # cs: the facts gathered from the context.
    cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
    # to use every word as a fact, useful for tasks with one-sentence contexts
    s = input_module_outputs

    # Question Module

    # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
    #  that contains all of the questions.

    query = tf.placeholder(tf.float32, [None, None, D], "query")

    # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
    # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
    # so that it plays nice with gather_nd.
    input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

    question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32,
                                                   scope=tf.VariableScope(True, "input_module"))

    # q: the question states. A [batch_size, recurrent_cell_size] tensor.
    q = tf.gather_nd(question_module_outputs, input_query_lengths)

    # Episodic Memory

    # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
    size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
    re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

    # Final output for attention, needs to be 1 in order to create a mask
    output_size = 1

    # Weights and biases
    attend_init = tf.random_normal_initializer(stddev=0.1)
    w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size],
                          tf.float32, initializer=attend_init)

    b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    b_2 = tf.get_variable("attend_b2", [1, output_size],
                          tf.float32, initializer=attend_init)

    # Regulate all the weights and biases
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

    def attention(c, mem, existing_facts):
        """
        Custom attention mechanism.
        c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
            that contains all the facts from the contexts.
        mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
            contains the current memory. It should be the same memory for all facts for accurate results.
        existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
            acts as a binary mask for which facts exist and which do not.

        """
        with tf.variable_scope("attending") as scope:
            # attending: The metrics by which we decide what to attend to.
            attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

            # m1: First layer of multiplied weights for the feed-forward network.
            #     We tile the weights in order to manually broadcast, since tf.matmul does not
            #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
            m1 = tf.matmul(attending * existing_facts,
                           tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
            # bias_1: A masked version of the first feed-forward layer's bias
            #     over only existing facts.

            bias_1 = b_1 * existing_facts

            # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
            #        choosing relu was a design choice intended to avoid issues with
            #        low gradient magnitude when the tanh returned values close to 1 or -1.
            tnhan = tf.nn.relu(m1 + bias_1)

            # m2: Second layer of multiplied weights for the feed-forward network.
            #     Still tiling weights for the same reason described in m1's comments.
            m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            # bias_2: A masked version of the second feed-forward layer's bias.
            bias_2 = b_2 * existing_facts

            # norm_m2: A normalized version of the second layer of weights, which is used
            #     to help make sure the softmax nonlinearity doesn't saturate.
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
            #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
            softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
            softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)
            return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

    # facts_0s: a [batch_size, max_facts_length, 1] tensor
    #     whose values are 1 if the corresponding fact exists and 0 if not.
    facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32)

    with tf.variable_scope("Episodes") as scope:
        attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # memory: A list of all tensors that are the (current or past) memory state
        #   of the attention mechanism.
        memory = [q]

        # attends: A list of all tensors that represent what the network attends to.
        attends = []
        for a in range(passes):
            # attention mask
            attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size),
                                  facts_0s)

            # Inverse attention mask, for what's retained in the state.
            retain = 1 - attend_to

            # GRU pass over the facts, according to the attention mask.
            while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
            update_state = (lambda state, index: (attend_to[:, index, :] *
                                                  attention_gru(cs[:, index, :], state)[0] +
                                                  retain[:, index, :] * state))
            # start loop with most recent memory and at the first index
            memory.append(tuple(tf.while_loop(while_valid_index,
                                              (lambda state, index: (update_state(state, index), index + 1)),
                                              loop_vars=[memory[-1], 0]))[0])

            attends.append(attend_to)

            # Reuse variables so the GRU pass uses the same variables every pass.
            scope.reuse_variables()

    # Answer Module

    # a0: Final memory state. (Input to answer module)
    a0 = tf.concat([memory[-1], q], -1)

    # fc_init: Initializer for the final fully connected layer's weights.
    fc_init = tf.random_normal_initializer(stddev=0.1)

    with tf.variable_scope("answer"):
        # w_answer: The final fully connected layer's weights.
        w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D],
                                   tf.float32, initializer=fc_init)
        # Regulate the fully connected layer's weights
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             tf.nn.l2_loss(w_answer))

        # The regressed word. This isn't an actual word yet;
        #    we still have to find the closest match.
        logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

        # Make a mask over which words exist.
        with tf.variable_scope("ending"):
            all_ends = tf.reshape(input_sentence_endings, [-1, 2])
            range_ends = tf.range(tf.shape(all_ends)[0])
            ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
            ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1],
                                              [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                                axis=-1)
            range_ind = tf.range(tf.shape(ind)[0])
            mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                                              tf.ones_like(range_ind), [tf.reduce_max(ind) + 1,
                                                                        tf.shape(ind)[0]]), bool)
            # A bit of a trick. With the locations of the ends of the mask (the last periods in
            #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
            #  (starting from all 1). For each context in the batch, this will result in 1s
            #  up until the marker (the location of that last period) and 0s afterwards.
            mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

        # We score each possible word inversely with their Euclidean distance to the regressed word.
        #  The highest score (lowest distance) will correspond to the selected word.
        logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims(
            tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1, name='logits')

    # Training

    # gold_standard: The real answers.
    gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer")
    with tf.variable_scope('accuracy'):
        eq = tf.equal(context, gold_standard)
        corrbool = tf.reduce_all(eq, -1, name='corrbool')
        logloc = tf.reduce_max(logits, -1, keepdims=True)
        # locs: A boolean tensor that indicates where the score
        #  matches the minimum score. This happens on multiple dimensions,
        #  so in the off chance there's one or two indexes that match
        #  we make sure it matches in all indexes.
        locs = tf.equal(logits, logloc)

        # correctsbool: A boolean tensor that indicates for which
        #   words in the context the score always matches the minimum score.
        correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1)
        # corrects: A tensor that is simply correctsbool cast to floats.
        corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32),
                            tf.zeros_like(correctsbool, dtype=tf.float32))

        # corr: corrects, but for the right answer instead of our selected answer.
        corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32),
                        tf.zeros_like(corrbool, dtype=tf.float32))
    with tf.variable_scope("loss"):
        # Use sigmoid cross entropy as the base loss,
        #  with our distances as the relative probabilities. There are
        #  multiple correct labels, for each location of the answer word within the context.
        loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1),
                                                       labels=corr)

        # Add regularization losses, weighted by weight_decay.
        total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than
    #  just the learning rate, but it's not necessary to find a very good optimum.
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Once we have an optimizer, we ask it to minimize the loss
    #   in order to work towards the proper training.
    opt_op = optimizer.minimize(total_loss)

    # Initialize variables
    init = tf.global_variables_initializer()

    # Launch the TensorFlow session
    sess = tf.Session()
    sess.run(init)

    def prep_batch(batch_data, more_data=False):
        """
            Prepare all the preproccessing that needs to be done on a batch-by-batch basis.
        """
        context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data)
        ends = list(sentence_ends)
        maxend = max(map(len, ends))
        aends = np.zeros((len(ends), maxend))
        for index, i in enumerate(ends):
            for indexj, x in enumerate(i):
                aends[index, indexj] = x - 1
        new_ends = np.zeros(aends.shape + (2,))

        for index, x in np.ndenumerate(aends):
            new_ends[index + (0,)] = index[0]
            new_ends[index + (1,)] = x

        contexts = list(context_vec)
        max_context_length = max([len(x) for x in contexts])
        contextsize = list(np.array(contexts[0]).shape)
        contextsize[0] = max_context_length
        final_contexts = np.zeros([len(contexts)] + contextsize)

        contexts = [np.array(x) for x in contexts]
        for i, context in enumerate(contexts):
            final_contexts[i, 0:len(context), :] = context
        max_query_length = max(len(x) for x in questionvs)
        querysize = list(np.array(questionvs[0]).shape)
        querysize[:1] = [len(questionvs), max_query_length]
        queries = np.zeros(querysize)
        querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs])))
        questions = [np.array(q) for q in questionvs]
        for i, question in enumerate(questions):
            queries[i, 0:len(question), :] = question
        data = {context_placeholder: final_contexts, input_sentence_endings: new_ends,
                query: queries, input_query_lengths: querylengths, gold_standard: answervs}
        return (data, context_words, cqas) if more_data else data

    # Use TQDM if installed
    tqdm_installed = False

    # Prepare validation set
    batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10)
    batch_data = final_test_data[batch]

    validation_set, val_context_words, val_cqas = prep_batch(batch_data, True)

    holder = [corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs]
    
    print('Starting session')
    start_time = time.time()
    ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends +
                    [query, cs, question_module_outputs], feed_dict=validation_set)
    elapsed_time = time.time() - start_time
    print(elapsed_time)
    a = ancr[0]
    n = ancr[1]
    cr = ancr[2]
    attenders = np.array(ancr[6:-3])
    faq = np.sum(ancr[4], axis=(-1, -2))  # Number of facts in each context

    limit = 1

    # Locations of responses within contexts
    indices = np.argmax(n, axis=1)

    # Locations of actual answers within contexts
    indicesc = np.argmax(a, axis=1)
    response = ""

    ans = 0
    inp = ''

    for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]:
        ccc = " ".join(cw)
        print("TEXT: ", ccc)
        inp = ccc
        print("QUESTION: ", " ".join(cqa[3]))
        print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e])
        ans = i
        print("EXPECTED: ", cw[e])
        print()
    # For safety, return this if nothing is found
    sess.close()
    
    print('--')
    tot_index = 0
    for line in fulldata:
        tot_index = tot_index + len(line)
        if tot_index >= ans:
            return line
    return response