Exemple #1
0
    def generater(self, z, y=None):
        if y:
            yb = tf.reshape(y, [None, 1, 1, self.y_dim])
            z = tf.concat(1, [z, y])

            h0 = tf.nn.relu(bn0(linear(z, self.gfc_dim)))
            h0 = tf.concat(1, [h0, y])

            h1 = tf.nn.relu(bn1(linear(z, self.gf_dim*2*7*7)))
            h1 = tf.reshape(h1, [None, 7, 7, self.gf_dim * 2])
            h1 = conv_cond_concat(h1, yb)

            h2 = tf.nn.relu(bn2(deconv2d(h1, self.gf_dim, name='h2')))
            h2 = conv_cond_concat(h2, yb)
            return tf.nn.sigmoid(deconv2d(h2, self.c_dim, name='h3'))
        else:
            h0 = tf.nn.relu(bn0(linear(z, self.gf_dim*8*4*4)))
            h0 = tf.reshape(h1, [None, 4, 4, self.gf_dim * 8])

            h1 = deconv2d(h0, self.gf_dim*4, name='h1')
            h1 = tf.relu(bn1(h1))

            h2 = deconv2d(h1, self.gf_dim*2, name='h2')
            h2 = tf.relu(bn2(h2))

            h3 = deconv2d(h2, self.gf_dim*1, name='h3')
            h3 = tf.relu(bn3(h3))

            h4 = deconv2d(h3, 3, name='h4')
            return tf.nn.tanh(h4)
Exemple #2
0
def alphgo(_x, _weights, _biases, _dropout):
    _x = _x.reshape([-1, 19, 19, 1])

    # convolution layer
    conv1 = tf.relu(conv2d(_x, _weights["conv1"]) + _biases["conv1"])
    pool1 = max_pool(conv1, k=2)
    norm1 = norm(pool1, lsize=4)
    norm1 = tf.nn.dropout(norm1, _dropout)

    # conv1 image show
    tf.image_summary(conv1)

    conv2 = tf.relu(conv2d(norm1, _weights["conv2"]) + _biases["conv2"])
    pool2 = max_pool(conv2, k=2)
    norm2 = norm(pool2, lsize=4)
    norm2 = tf.nn.dropout(norm2, _dropout)

    conv3 = tf.relu(conv2d(norm2, _weights["conv3"]) + _biases["conv3"])
    pool3 = max_pool(conv3, k=2)
    norm3 = norm(pool3, lsize=4)
    norm3 = tf.nn.dropout(norm3, _dropout)

    # fully connect layer
    dense1 = tf.reshape(norm3, [-1, 4 * 4 * 1024])
    dense1 = tf.nn.relu(tf.matmul(dense1, _weights["d1"]) + _biases["d1"])
    dense2 = tf.nn.relu(tf.matmul(dense1, _weights["d2"]) + _biases["d2"])

    out = tf.matmul(dense2, _weights["out"]) + _biases["out"]
    return out
 def act_mrelu(net, mrelu):
     """Check this works
     """
     net2 = mrelu["mult"] * (mrelu["addi"] + net)
     net2 = -tf.relu(net2)
     out_1 = tf.math.reduce_sum(net2)
     out = net - tf.relu(out_1)
     return out
Exemple #4
0
def tf_ReLU_lin_grad(input_tensor):
    y = tf.relu(input_tensor)

    def grad(dy):
        return tf.identity(dy)

    return y, grad
Exemple #5
0
 def apply_nonlin(self, x):
     if self.nonlin_type == 'lrelu':
         return tf.relu(x, leak=.01)
     elif self.nonlin_type == 'tanh':
         return tf.tanh(x)
     else:
         raise NotImplementedError(self.nonlin_type)
def dynamicRNN(x, seqlen):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    #x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    #x = tf.reshape(x, [-1, maximum_words_in_sentences])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(1, maximum_words_in_sentences, x)
    x = [tf.squeeze(x_, [1]) for x_ in x]
    # Define a lstm cell with tensorflow
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1)

    tm_cell = tf.nn.rnn_cell.DropoutWrapper(
               lstm_cell, output_keep_prob= 0.25)
    # cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * 2)
    # calculation. TODO:implement 2-layer
    outputs, states = tf.nn.rnn(tm_cell, x, dtype=tf.float32,
                                sequence_length=seqlen)
    #batch_size_=outputs[0].get_shape()[0]
    #batch_size_=tf.cast(batch_size_, int)
    #outputs=tf.reduce_mean(outputs,0)
    outputs=tf.split(1, batch_size, outputs)
    outputs=[tf.reshape(output,[-1,n_hidden]) for output in outputs]
    #print outputs[0].get_shape()
    for i in range(batch_size):
        outputs[i]=tf.nn.xw_plus_b(outputs[i],weights['out'],biases['out'],name="linear")
        outputs[i]=tf.relu(outputs[i])
    outputs=tf.pack(outputs)#the [1] with length of batch_size becomes [0] now
    outputs=tf.reduce_mean(outputs,1)#change to 1 accordingly
    return outputs
Exemple #7
0
    def __init__(self, max_words, num_classes, vocab_size, 
            embedding_size, num_hidden):

        # input, output, dropout placeholders
        self.text = tf.placeholder(tf.int32, [None, max_words], name="input_text")
        self.extra = tf.placeholder(tf.int32, [None, max_words], name="input_extra")
        self.output = tf.placeholder(tf.float32, [None, num_classes], name="output_y")
        self.sequence_lengths = tf.placeholder(tf.int32, [None], name="sequence_lengths")
        self.dropout_prob = tf.placeholder(tf.float32, name="dropout_probability")

        # Word embedding layer
        with tf.device("/cpu:0"), tf.name_scope("word_embedding"):
            embedding_matrix = tf.Variable(
                    tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), # random numbers between -1 and 1
                    name="embedding_matrix")
            self.lookup = tf.nn.embedding_lookup(embedding_matrix, self.text)

        # GRU
        with tf.name_scope("GRU"):
            output, state = rnn.dynamic_rnn(
                    rnn_cell.GRUCell(num_hidden),
                    self.lookup,
                    dtype=tf.float32,
                    sequence_length=self.sequence_lengths)
            output = tf.transpose(output, [1, 0, 2])
            self.gru = tf.gather(output, int(output.get_shape()[0]) - 1)

        # Add dropout
        with tf.name_scope("dropout"):
            self.dropout = tf.nn.dropout(self.gru, self.dropout_prob)

        # add in extra data and relu layer
        with tf.name_scope("extra_data"):
            combined = tf.concat(1, [self.dropout, self.extra])
            weights_e = tf.Variable(tf.truncated_normal([num_hidden, num_hidden], stddev=0.1), name="weights_extra")
            biases_e = tf.Variable(tf.constant(0.1, shape=[num_hidden]), name="biases_extra")
            processed = tf.relu(tf.matmul(combined, weights_e) + biases_e)

        # Final output
        with tf.name_scope("output"):
            weights = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1), name="weights")
            biases = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="biases")
            unscaled = tf.matmul(processed, weights) + biases
            self.scores = tf.nn.softmax(unscaled, name="scores")
            self.predictions = tf.argmax(self.scores, dimension=1, name="predictions")

        # calculate loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(unscaled, self.output)
            self.loss = tf.reduce_mean(losses)

        # calculate accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.output, 1))
            self.accuracy = 100 * tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Exemple #8
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [tf.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay:
            lr = lr * (1. / (1. + self.decay *
                             tf.cast(self.iterations, tf.dtype(self.decay))))

        t = tf.cast(self.iterations, tf.float32) + 1.
        beta_1 = self.beta_1
        beta_2 = self.beta_2
        beta_1_t = tf.pow(beta_1, t)
        beta_2_t = tf.pow(beta_2, t)
        rho_inf = 2. / (1. - beta_2) - 1.
        rho_t = rho_inf - 2. * t * beta_2_t / (1. - beta_2_t)
        r_t = tf.math.sqrt(
            tf.relu(rho_t - 4.) * (rho_t - 2.) * rho_inf /
            (tf.relu(rho_inf - 4.) * (rho_inf - 2.) * rho_t))
        flag = tf.cast(rho_t > 4., tf.float32)

        ms = [tf.zeros(tf.int_shape(p)) for p in params]
        vs = [tf.zeros(tf.int_shape(p)) for p in params]

        self.weights = [self.iterations] + ms + vs
        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = beta_1 * m + (1. - beta_1) * g
            v_t = beta_2 * v + (1. - beta_2) * tf.square(g)

            m_hat_t = m_t / (1. - beta_1_t)
            v_hat_t = K.sqrt(v_t / (1. - beta_2_t))
            new_p = p - lr * (r_t /
                              (v_hat_t + self.epsilon) + flag - 1.) * m_hat_t

            if getattr(p, "constraint", None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(tf.update(p, new_p))
            self.updates.append(tf.update(m, m_t))
            self.updates.append(tf.update(v, v_t))
        return self.updates
 def build_layers(s, c_names, n_l1, w_initializer, b_initializer):
     with tf.variable_scope('l1'):
         # [1,n_features]*[n_features,n_l1]
         w1 = tf.get_variable(
             'w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names)
         b1 = tf.get_variable(
             'b1', [1, self.n_actions], initializer=b_initializer, collections=c_names)
         l1 = tf.relu(tf.matmul(s, w1) + b1)
     with tf.variable_scope('l2'):
         w2 = tf.get_variable(
             'w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names)
         b2 = tf.get_variable(
             'b2', [1, self.n_actions], initializer=b_initializer, collections=c_names)
         out = tf.matmul(l1, w2) + b2
     return out
Exemple #10
0
    def build(self, rgb, train_mode=False):
        '''
            定义vgg16
        :param rgb 输入为224X224RGB图像
        :param train_mode 标识符,如果处于训练阶段,则dropout会打开
        '''
        self.conv1_1 = self.conv_layer(rgb, "conv1_1", 3, 64)
        self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2", 64, 64)
        self.pool1 = self.max_pool(self.conv1_2, "pool1")

        self.conv2_1 = self.conv_layer(self.pool1, "conv2_1", 64, 128)
        self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2", 128, 128)
        self.pool2 = self.max_pool(self.conv2_2, "pool2")

        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1", 128, 256)
        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2", 256, 256)
        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3", 256, 256)
        self.pool3 = self.max_pool(self.conv3_3, "pool3")

        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1", 256, 512)
        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2", 512, 512)
        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3", 512, 512)
        self.pool4 = self.conv_layer(self.conv4_3, "pool4")

        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1", 512, 512)
        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2", 512, 512)
        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3", 512, 512)
        self.pool5 = self.max_pool(self.conv5_3, "pool5")

        self.fc6 = self.fc_layer(self.pool5, "fc6", 25088,
                                 4096)  #25088 = ((224//(2**5))**2)*512
        self.relu6 = tf.nn.relu(self.fc6)
        if train_mode:
            self.relu6 = tf.nn.dropout(self.relu6, self.dropout)

        self.fc7 = self.fc_layer(self.relu6, "fc7", 4096, 4096)
        self.relu7 = tf.relu(self.fc7)
        if train_mode:
            self.relu7 = tf.nn.dropout(self.relu7, self.dropout)

        self.fc8 = self.fc_layer(self.relu7, "fc8", 4096, 1000)

        self.prob = tf.nn.softmax(self.fc8, name="prob")
def forward_prop(x, params):
    w1 = params["w1"]
    w2 = params["w2"]
    z1 = tf.nn.conv2d(x, w1, [1, 1, 1, 1], padding="same")
    a1 = tf.nn.relu(z1)
    p1 = tf.nn.max_pool(a1,
                        ksize=[1, 8, 8, 1],
                        strides=[1, 8, 8, 1],
                        padding="same")
    z2 = tf.nn.conv2d(p1, w2, [1, 1, 1, 1], padding="same")
    a2 = tf.relu(z2)
    p2 = tf.nn.max_pool(a2,
                        ksize=[1, 4, 4, 1],
                        strides=[1, 4, 4, 1],
                        padding="same")
    p2 = tf.nn.contrib.layers.flatten(p2)
    z3 = tf.contrib.layers.fully_connected(p2,
                                           num_outputs=6,
                                           activation_fn=None)
    return z3
Exemple #12
0
    def build_loss(self, seqs_repr, data_ops):
        """Convert per-location real-valued predictions to a loss."""
        # targets
        tstart = self.batch_buffer // self.target_pool
        tend = (self.batch_length - self.batch_buffer) // self.target_pool

        targets = data_ops['label']
        targets = tf.identity(targets[:, tstart:tend, :], name='targets_op')

        # work-around for specifying my own predictions
        self.preds_adhoc = tf.placeholder(tf.float32,
                                          shape=seqs_repr.shape,
                                          name='preds-adhoc')

        # choose link
        if self.link in ['identity', 'linear']:
            self.preds_op = tf.identity(seqs_repr, name='preds')

        elif self.link == 'relu':
            self.preds_op = tf.relu(seqs_repr, name='preds')

        elif self.link == 'exp':
            self.preds_op = tf.exp(tf.clip_by_value(seqs_repr, -50, 50),
                                   name='preds')

        elif self.link == 'exp_linear':
            self.preds_op = tf.where(seqs_repr > 0,
                                     seqs_repr + 1,
                                     tf.exp(
                                         tf.clip_by_value(seqs_repr, -50, 50)),
                                     name='preds')

        elif self.link == 'softplus':
            self.preds_op = tf.nn.softplus(seqs_repr, name='preds')

        elif self.link == 'softmax':
            # performed in the loss function, but saving probabilities
            self.preds_prob = tf.nn.softmax(seqs_repr, name='preds')

        else:
            print('Unknown link function %s' % self.link, file=sys.stderr)
            exit(1)

        # clip
        if self.target_clip is not None:
            self.preds_op = tf.clip_by_value(self.preds_op, 0,
                                             self.target_clip)
            targets = tf.clip_by_value(targets, 0, self.target_clip)

        # sqrt
        if self.target_sqrt:
            self.preds_op = tf.sqrt(self.preds_op)
            targets = tf.sqrt(targets)

        loss_op = None
        loss_adhoc = None
        # choose loss
        if self.loss == 'gaussian':
            loss_op = tf.squared_difference(self.preds_op, targets)
            loss_adhoc = tf.squared_difference(self.preds_adhoc, targets)

        elif self.loss == 'poisson':
            loss_op = tf.nn.log_poisson_loss(targets,
                                             tf.log(self.preds_op),
                                             compute_full_loss=True)
            loss_adhoc = tf.nn.log_poisson_loss(targets,
                                                tf.log(self.preds_adhoc),
                                                compute_full_loss=True)

        elif self.loss == 'negative_binomial':
            # define overdispersion alphas
            self.alphas = tf.get_variable(
                'alphas',
                shape=[self.num_targets],
                initializer=tf.constant_initializer(-5),
                dtype=tf.float32)
            self.alphas = tf.nn.softplus(tf.clip_by_value(
                self.alphas, -50, 50))
            tf.summary.histogram('alphas', self.alphas)
            for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'):
                tf.summary.scalar('alpha_t%d' % ti, self.alphas[ti])

            # compute w/ inverse
            k = 1. / self.alphas

            # expand k
            k_expand = tf.tile(k, [self.batch_size * seq_length])
            k_expand = tf.reshape(
                k_expand, (self.batch_size, seq_length, self.num_targets))

            # expand lgamma(k)
            lgk_expand = tf.tile(tf.lgamma(k), [self.batch_size * seq_length])
            lgk_expand = tf.reshape(
                lgk_expand, (self.batch_size, seq_length, self.num_targets))

            # construct loss
            loss1 = targets * tf.log(self.preds_op /
                                     (self.preds_op + k_expand))
            loss2 = k_expand * tf.log(k_expand / (self.preds_op + k_expand))
            loss3 = tf.lgamma(targets + k_expand) - lgk_expand
            loss_op = -(loss1 + loss2 + loss3)

            # adhoc
            loss1 = targets * tf.log(self.preds_adhoc /
                                     (self.preds_adhoc + k_expand))
            loss2 = k_expand * tf.log(k_expand / (self.preds_adhoc + k_expand))
            loss_adhoc = -(loss1 + loss2 + loss3)

        elif self.loss == 'negative_binomial_hilbe':
            # define overdispersion alphas
            self.alphas = tf.get_variable(
                'alphas',
                shape=[self.num_targets],
                initializer=tf.constant_initializer(-5),
                dtype=tf.float32)
            self.alphas = tf.exp(tf.clip_by_value(self.alphas, -50, 50))

            # expand
            alphas_expand = tf.tile(self.alphas,
                                    [self.batch_size * seq_length])
            alphas_expand = tf.reshape(
                alphas_expand, (self.batch_size, seq_length, self.num_targets))

            # construct loss
            loss1 = targets * tf.log(self.preds_op)
            loss2 = (alphas_expand * targets + 1) / alphas_expand
            loss3 = tf.log(alphas_expand * self.preds_op + 1)
            loss_op = -loss1 + loss2 * loss3

            # adhoc
            loss1 = targets * tf.log(self.preds_adhoc)
            loss3 = tf.log(alphas_expand * self.preds_adhoc + 1)
            loss_adhoc = -loss1 + loss2 * loss3

        elif self.loss == 'gamma':
            # jchan document
            loss_op = targets / self.preds_op + tf.log(self.preds_op)
            loss_adhoc = targets / self.preds_adhoc + tf.log(self.preds_adhoc)

        elif self.loss == 'cross_entropy':
            loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_op)
            loss_adhoc = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_adhoc)

        else:
            print('Cannot identify loss function %s' % self.loss)
            exit(1)

        # set NaN's to zero
        # loss_op = tf.boolean_mask(loss_op, tf.logical_not(self.targets_na[:,tstart:tend]))

        # reduce lossses by batch and position
        loss_op = tf.reduce_mean(loss_op, axis=[0, 1], name='target_loss')
        loss_op = tf.check_numerics(loss_op, 'Invalid loss', name='loss_check')

        loss_adhoc = tf.reduce_mean(loss_adhoc,
                                    axis=[0, 1],
                                    name='target_loss_adhoc')
        tf.summary.histogram('target_loss', loss_op)
        for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'):
            tf.summary.scalar('loss_t%d' % ti, loss_op[ti])
        self.target_losses = loss_op
        self.target_losses_adhoc = loss_adhoc

        # define target sigmas
        """
        self.target_sigmas = tf.get_variable('target_sigmas',
        shape=[self.num_targets], initializer=tf.constant_initializer(2),
        dtype=tf.float32)
        self.target_sigmas =
        tf.nn.softplus(tf.clip_by_value(self.target_sigmas,-50,50))
        tf.summary.histogram('target_sigmas', self.target_sigmas)
        for ti in np.linspace(0,self.num_targets-1,10).astype('int'):
            tf.summary.scalar('sigma_t%d'%ti, self.target_sigmas[ti])
        # self.target_sigmas = tf.ones(self.num_targets) / 2.
        """

        # dot losses target sigmas
        # loss_op = loss_op / (2*self.target_sigmas)
        # loss_adhoc = loss_adhoc / (2*self.target_sigmas)

        # fully reduce
        loss_op = tf.reduce_mean(loss_op, name='loss')
        loss_adhoc = tf.reduce_mean(loss_adhoc, name='loss_adhoc')

        # add extraneous terms
        loss_op += self.weights_regularizers  # + tf.reduce_mean(tf.log(self.target_sigmas))
        loss_adhoc += self.weights_regularizers  # + tf.reduce_mean(tf.log(self.target_sigmas))

        # track
        tf.summary.scalar('loss', loss_op)
        self.targets_op = targets
        return loss_op, loss_adhoc
  def get_train_examples(self, data_dir):
    """See base class."""
    
    examples = []
    train_df = []
    
    with ZipFile('sampleDir.zip', 'r') as zipObj:
        zipObj.extractall()
    
    train_df = pd.read_json("simplified-nq-train.jsonl", orient = 'records', lines = True)
    print('Our dataset have {} rows and {} columns'.format(df.shape[0], df.shape[1]))
    gc.collect()
    
    for i_main, row in train.iterrows():
    
        document_text = row['document_text'].split()
        question_text = row['question_text']
        
        for candidate_no, long_answer_candidate in enumerate(row['long_answer_candidates']):
            
            target_conv3 = [0] * FLAGS.cont_len
            target_conv6 = [0] * FLAGS.cont_len
            target_present = [0] * FLAGS.cont_len
            
            q_mask = [1] * FLAGS.ques_len
            c_mask = [1] * FLAGS.cont_len

            long_ans_start_tok = long_answer_candidate['start_token']
            long_ans_end_tok = long_answer_candidate['end_token']
            long_cand_length = long_ans_end_tok - long_ans_start_tok
            
            if long_cand_length > FLAGS.cont_len:
                long_sentence = " ".join(document_text[long_ans_start_tok:long_ans_start_tok + FLAGS.cont_len)                                       
            else:
                long_sentence = " ".join(document_text[long_ans_start_tok:long_ans_end_tok)
                for i in range(long_cand_length+1,FLAG.cont_len):
                    c_mask[i] = 0
            
            if long_ans_start_tok == row['annotations'][0]['long_answer']['start_token'] and \
                len(row['annotations'][0]['short_answers']) > 0:

                #print("this is correct long answer")

                short_answer_start_token = row['annotations'][0]['short_answers'][0]['start_token']
                short_answer_end_token = row['annotations'][0]['short_answers'][0]['end_token']
                short_start_idx = short_answer_start_token-long_ans_start_tok
                short_end_idx = short_answer_end_token-long_ans_start_tok
                
                if short_end_idx < cont_len:
                    target_start[short_start_idx] = 1
                    target_end[short_end_idx] = 1

                    for i in range(short_start_idx,short_end_idx):
                        target_present[i] = 1
                else:
                    smth = "short answer beyond maximum len"
             
            ques_length = len(question_text.split())                                          
            if ques_length < FLAGS.ques_len:
                for i in range(ques_length+1,FLAGS.ques_len):
                    q_mask[i] = 0                                 
                                                       
            guid = "train-%d" % (i_main)
            text_a = tokenization.convert_to_unicode(long_sentence)
            text_b = tokenization.convert_to_unicode(question_text)
            target_start = tokenization.convert_to_unicode(target_start)
            target_end = tokenization.convert_to_unicode(target_end)
            target_present = tokenization.convert_to_unicode(target_present)
            q_mask = tokenization.convert_to_unicode(q_mask)
            c_len = tokenization.convert_to_unicode(c_mask)                                          
                                                       
            examples.append(InputExample(guid=guid, text_a=text_a,\
                                         text_b=text_b, target_start=target_start, target_end=target_end, 
                                         target_present=target_present, q_mask=q_mask, c_mask=c_mask))

    return examples        

  def get_labels(self):
    """See base class."""
    return ["target_conv3", "target_conv6", "target_present"]

def convert_single_example(ex_index, example, label_list, max_seq_length,
                           tokenizer):
  """Converts a single `InputExample` into a single `InputFeatures`."""

  if isinstance(example, PaddingInputExample):
    return InputFeatures(
        input_ids=[0] * max_seq_length,
        input_mask=[0] * max_seq_length,
        segment_ids=[0] * max_seq_length,
        target_conv3 = [0]*FLAGS.cont_len,
        target_conv6 = [0]*FLAGS.cont_len,
        target_present = [0]*FLAGS.cont_len,
        q_mask = [0]*FLAGS.ques_len
        c_mask = [0]*FLAGS.cont_len
        is_real_example=False)

  label_map = {}
  for (i, label) in enumerate(label_list):
    label_map[label] = i

  tokens_a = tokenizer.tokenize(example.text_a)
                                             
  #We need exact length to later build the BIDAF
  tokens_a = tokens_a[0:FLAGS.cont_len]
                                                       
  tokens_b = None
  if example.text_b:
    tokens_b = tokenizer.tokenize(example.text_b)
    #We need exact length to later build the BIDAF
    tokens_b = tokens_b[0:FLAGS.cont_len]                                              
                    
  if tokens_b:
    # Modifies `tokens_a` and `tokens_b` in place so that the total
    # length is less than the specified length.
    # Account for [CLS], [SEP], [SEP] with "- 3"
    _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
  else:
    # Account for [CLS] and [SEP] with "- 2"
    if len(tokens_a) > max_seq_length - 2:
      tokens_a = tokens_a[0:(max_seq_length - 2)]

  # The convention in BERT is:
  # (a) For sequence pairs:
  #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
  #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
  # (b) For single sequences:
  #  tokens:   [CLS] the dog is hairy . [SEP]
  #  type_ids: 0     0   0   0  0     0 0
  #
  # Where "type_ids" are used to indicate whether this is the first
  # sequence or the second sequence. The embedding vectors for `type=0` and
  # `type=1` were learned during pre-training and are added to the wordpiece
  # embedding vector (and position vector). This is not *strictly* necessary
  # since the [SEP] token unambiguously separates the sequences, but it makes
  # it easier for the model to learn the concept of sequences.
  #
  # For classification tasks, the first vector (corresponding to [CLS]) is
  # used as the "sentence vector". Note that this only makes sense because
  # the entire model is fine-tuned.
  tokens = []
  segment_ids = []
  tokens.append("[CLS]")
  segment_ids.append(0)
  for token in tokens_a:
    tokens.append(token)
    segment_ids.append(0)
  tokens.append("[SEP]")
  segment_ids.append(0)

  if tokens_b:
    for token in tokens_b:
      tokens.append(token)
      segment_ids.append(1)
    tokens.append("[SEP]")
    segment_ids.append(1)

  input_ids = tokenizer.convert_tokens_to_ids(tokens)

  # The mask has 1 for real tokens and 0 for padding tokens. Only real
  # tokens are attended to.
  input_mask = [1] * len(input_ids)

  # Zero-pad up to the sequence length.
  while len(input_ids) < max_seq_length:
    input_ids.append(0)
    input_mask.append(0)
    segment_ids.append(0)

  assert len(input_ids) == max_seq_length
  assert len(input_mask) == max_seq_length
  assert len(segment_ids) == max_seq_length

  #The following 3 lines are redundant; just for convention                                                    
  label_conv3 = example.target_conv3
  label_conv6 = example.target_conv6
  label_present = example.target_present
  c_mask = example.c_mask
  q_mask = example.q_mask                                                       
                                                                                                              
  if ex_index < 5:
    tf.logging.info("*** Example ***")
    tf.logging.info("guid: %s" % (example.guid))
    tf.logging.info("tokens: %s" % " ".join(
        [tokenization.printable_text(x) for x in tokens]))
    tf.logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids]))
    tf.logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask]))
    tf.logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids]))
    tf.logging.info("label_start: (id = %d)" % (label_conv3))
    tf.logging.info("label_end: (id = %d)" % (label_conv6))
    tf.logging.info("label_present: (id = %d)" % (label_present))

  feature = InputFeatures(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids,
      target_conv3 = label_conv3,
      target_conv6 = label_conv6,
      target_present = label_present,
      c_mask = c_mask,
      q_mask = q_mask,
      is_real_example=True)
  return feature


def file_based_convert_examples_to_features(
    examples, label_list, max_seq_length, tokenizer, output_file):
  """Convert a set of `InputExample`s to a TFRecord file."""

  writer = tf.python_io.TFRecordWriter(output_file)

  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    def create_int_feature(values):
      f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
      return f

    features = collections.OrderedDict()
    features["input_ids"] = create_int_feature(feature.input_ids)
    features["input_mask"] = create_int_feature(feature.input_mask)
    features["segment_ids"] = create_int_feature(feature.segment_ids)
    features["target_start_ids"] = create_int_feature([feature.target_conv3])
    features["target_end_ids"] = create_int_feature([feature.target_conv6])
    features["target_present_ids"] = create_int_feature([feature.target_present])
    features["q_mask"] = create_int_feature([feature.q_mask]) 
    features["c_mask"] = create_int_festure([feature.c_mask])                                                  
    features["is_real_example"] = create_int_feature(
        [int(feature.is_real_example)])

    tf_example = tf.train.Example(features=tf.train.Features(feature=features))
    writer.write(tf_example.SerializeToString())
  writer.close()


def file_based_input_fn_builder(input_file, seq_length, is_training,
                                drop_remainder):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""

  name_to_features = {
      "input_ids": tf.FixedLenFeature([seq_length], tf.int64),
      "input_mask": tf.FixedLenFeature([seq_length], tf.int64),
      "segment_ids": tf.FixedLenFeature([seq_length], tf.int64),
      "target_start_ids": tf.FixedLenFeature([], tf.int64),
      "target_end_ids": tf.FixedLenFeature([], tf.int64),
      "target_present_ids": tf.FixedLenFeature([], tf.int64),
      "c_mask": tf.FixedLenFeature([], tf.int64),
      "q_mask": tf.FixedLenFeature([], tf.int64),
      "is_real_example": tf.FixedLenFeature([], tf.int64),
  }

  def _decode_record(record, name_to_features):
    """Decodes a record to a TensorFlow example."""
    example = tf.parse_single_example(record, name_to_features)

    # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
    # So cast all int64 to int32.
    for name in list(example.keys()):
      t = example[name]
      if t.dtype == tf.int64:
        t = tf.to_int32(t)
      example[name] = t

    return example

  def input_fn(params):
    """The actual input function."""
    batch_size = params["batch_size"]

    # For training, we want a lot of parallel reading and shuffling.
    # For eval, we want no shuffling and parallel reading doesn't matter.
    d = tf.data.TFRecordDataset(input_file)
    if is_training:
      d = d.repeat()
      d = d.shuffle(buffer_size=100)

    d = d.apply(
        tf.contrib.data.map_and_batch(
            lambda record: _decode_record(record, name_to_features),
            batch_size=batch_size,
            drop_remainder=drop_remainder))

    return d

  return input_fn


def _truncate_seq_pair(tokens_a, tokens_b, max_length):
  """Truncates a sequence pair in place to the maximum length."""

  # This is a simple heuristic which will always truncate the longer sequence
  # one token at a time. This makes more sense than truncating an equal percent
  # of tokens from each, since if one sequence is very short then each token
  # that's truncated likely contains more information than a longer sequence.
  while True:
    total_length = len(tokens_a) + len(tokens_b)
    if total_length <= max_length:
      break
    if len(tokens_a) > len(tokens_b):
      tokens_a.pop()
    else:
      tokens_b.pop()
                                                       
def masked_softmax(logits, mask, dim):
    """
    Takes masked softmax over given dimension of logits. Discards padded entries with e^(-inf).
    Inputs:
      logits: Numpy array. We want to take softmax over dimension dim.
      mask: Numpy array of same shape as logits.
        Has 1s where there's real data in logits, 0 where there's padding
      dim: int. dimension over which to take softmax
    Returns:
      masked_logits: Numpy array same shape as logits.
        This is the same as logits, but with 1e30 subtracted
        (i.e. very large negative number) in the padding locations.
      prob_dist: Numpy array same shape as logits.
        The result of taking softmax over masked_logits in given dimension.
        Should be 0 in padding locations.
        Should sum to 1 over given dimension.
    """
    exp_mask = (1 - tf.cast(mask, 'float')) * (-1e30) # -large where there's padding, 0 elsewhere
    masked_logits = tf.add(logits, exp_mask) # where there's padding, set logits to -large
    prob_dist = tf.nn.softmax(masked_logits, dim)
    return masked_logits, prob_dist                                                       
                                                    
def cnn_output_width(input_width, kernel_size, padding_amount, strides):
    return (input_width - kernel_size + 2*padding_amount) / strides + 1  
                                                       
def deconv_output_shape(input_batch_size, input_size_w, output_channel_size, padding):
    output_size_h = 1
    stride = 2
    filter_size_w = 2
    if padding == 'VALID':
        output_size_w = (input_size_w - 1)*stride + filter_size_w
    elif padding == 'SAME':
        output_size_w = (input_size_w - 1)*stride + 1
    else:
        raise ValueError("unknown padding")
    output_shape = tf.stack([input_batch_size, 
                                output_size_h, output_size_w, 
                                output_channel_size])                                               
    return output_shape
                                                       
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 target_conv3, target_conv6, target_present, q_mask, c_mask, num_labels, use_one_hot_embeddings):
  """Creates a classification model."""
  model = modeling.BertModel(
      config=bert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)

  # In the demo, we are doing a simple classification task on the entire
  # segment.
  #
  # If you want to use the token-level output, use model.get_sequence_output()
  # instead.
  output_layer = model.get_sequence_output()
  
  output_layer_shape = modeling.get_shape_list(output_layer, expected_rank=3)                                                     
  
  batch_size = output_layer[0]
  seq_length = output_layer[1]
  hidden_size = output_layer[2]                                                       
                                                    
  hidden_size = output_layer.shape[-1].value                                              

  SW_weights = tf.get_variable(
      "similarity_weights", [1, 3*hidden_size],
      initializer=tf.contrib.layers.xavier_initializer())
                                                       
  c = output_layer[:,1:FLAGS.cont_len+1,:] #do not count the [CLS]
  q = output_layer[:,FLAGS.cont_len+2:-2,:] #do not count the [SEP] and [SEP]
                                                                                                            
  # Hidden size = 2h by convention                     
                                                       
  c_expand = tf.expand_dims(c,2)  #[B,N,1,2h]
  q_expand = tf.expand_dims(q,1)  #[B,1,M,2h]
  c_pointWise_q = c_expand * q_expand  #[B,N,M,2h]                                                     
                                                       
  c_input = tf.tile(c_expand, [1, 1, tf.shape(q)[1], 1]) #fill in to get same dims
  q_input = tf.tile(q_expand, [1, tf.shape(c)[1], 1, 1])
                                                     
  concat_input = tf.concat([c_input, q_input, c_pointWise_q], -1) # [B,N,M,6h]

  similarity=tf.reduce_sum(concat_input * self.S_W, axis=3)  #[B,N,M]
                                           
  # q_mask shape [B,M]
  # c_mask shape [B,N]                                                     
  similarity_mask = tf.expand_dims(q_mask, 1) # [B, 1, M]
                                                       
  similarity_mask = tf.tile(similarity_mask, [1,tf.shape(c)[1],1]) # [B, N, M]
                                                       
  _, c2q_dist = masked_softmax(similarity, similarity_mask, 2) # shape (B, N, M). take softmax over q
                                                       
  c2q = tf.matmul(c2q_dist, q) # shape (B, N, 2h)
                                                       
  S_max = tf.reduce_max(similarity, axis=2) # shape (B, N) ; reminder N = cont_len
                                                       
  _, c_dash_dist = masked_softmax(S_max, c_mask, 1) # distribution of shape (B, N)
                                                       
  c_dash_dist_expand = tf.expand_dims(c_dash_dist, 1) # shape (B, 1, N)
                                                       
  c_dash = tf.matmul(c_dash_dist_expand, c) # shape (B, 1, 2h)
                                                       
  c_c2q = c * c2q # shape (B, N, 2h)
  
  c_dash =  tf.tile(c_dash, [1,tf.shape(c)[1],1]) # [B, N, 2h]                                                    
                                                       
  c_c_dash = c * c_dash # shape (B, N, 2h)
                                                       
  output = tf.concat([c2q, c_c2q, c_c_dash], axis=2) # (B, N, 2h * 3)                                                      
                                                       
  output = tf.nn.dropout(output, 0.9)
                                                                                                            
  blended_reps = tf.concat([c, output], axis=2)  # (B, N, 8h)

  ### ADD MODELING LAYER .. but first add some more data                                                    
  
  pooled_output = model.get_pooled_output()  # Shape (B, 2h)

  pooled_exp = tf.expand_dims(pooled_output, 1) # shape (B, 1, 2h)                                                           
                                                       
  pooled_tile = tf.tile(pooled_tile, [1, FLAGS.cont_len, 1]) # shape (B, cont_len, 2h)                                                           
                                                                                                                                                                                                                  
  model_input = tf.concat([blended_reps, pooled_tile], 2) # shape (B, cont_len, 10h)
  
  # we will go two different routes. targets_conv will come from convolution layers and target_present from lstm..
  # the following is route 1:                                                     
                                                       
  fw_cell = tf.nn.rnn_cell.BasicLSTMCell(256)
  bw_cell = tf.nn.rnn_cell.BasicLSTMCell(256)
  rnn_outputs, rnn_state = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell, cell_bw=bw_cell,
                                                                 inputs=model_input, sequence_length=FLAGS.cont_len,
                                                                   dtype=tf.float64)
  
  rnn_outputs = tf.concat(rnn_outputs, 2) # Shape (B, cont_len, 256*2)
  rnn_outputs = tf.relu(rnn_outputs)
                                                       
  # Now copying from run_nq.py                                                       
  rnn_output_weights = tf.get_variable(
                "rnn_output_w", [1, 256],
                  initializer=tf.truncated_normal_initializer(stddev=0.02))
  rnn_outout_bias = tf.get_variable(
                  "rnn_output_b", [1], initializer=tf.zeros_initializer())
                                                       
  rnn_outputs = tf.reshape(rnn_outputs, [batch_size*FLAGS.cont_len, hidden_size])  # shape [B*N, 2h]                                                    
                                
  rnn_logits = tf.matmul(rnn_outputs, rnn_output_weights, transpose_b=True) # shape [B*N, 1]
  
  rnn_logits = tf.nn.bias_add(rnn_logits, rnn_output_bias) # shape [B*N, 1]
                                                       
  rnn_logits = tf.reshape(rnn_logits, [batch_size, FLAGS.cont_len, 1]) #shape [B, N, 1]
                                                       
  rnn_logits = tf.squeeze(rnn_logits, axis=2) #shape [B, N]
  
  rnn_preds = tf.sigmoid(rnn_logits)                                                     
                                                       
  rnn_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=targets_present, logits=rnn_logits) 
                                                       
  # Now Route 2: Convolutions 
  # Expand dims to make it a 3D for the convolution:
  conv_input = tf.expand_dims(model_input, axis=1)  # Change the shape to [B, 1, cont_len, 5*emb_size]                                                     
  
  #U-NET downladder filters                                                                                                         
  filter1 = tf.get_variable("conv1_filter", shape=[1, 3, hidden_size*5, 64]) # [h, w, in_size, out_size]
  filter2 = tf.get_variable("conv2_filter", shape=[1, 3, 64, 64])
  filter3 = tf.get_variable("conv3_filter", shape=[1, 3, 64, 128])
  filter4 = tf.get_variable("conv4_filter", shape=[1, 3, 128, 128])
  filter5 = tf.get_variable("conv5_filter", shape=[1, 3, 128, 256])
  filter6 = tf.get_variable("conv6_filter", shape=[1, 3, 256, 256])
                                                       
  #U-NET upladder filters                                                     
  up6_filter = tf.get_variable("up6_filter", shape=[1, 2, 256, 256])
  filter7 = tf.get_variable("conv3_filter", shape=[1, 3, 256, 256])
  up7_filter = tf.get_variable("up6_filter", shape=[1, 2, 384, 384])
  filter8 = tf.get_variable("conv3_filter", shape=[1, 3, 448, 448])
  filter9 = tf.get_variable("conv3_filter", shape=[1, 3, 448, 1])                                                     
                                                    
  # Output shapes based on default cont_len 350                                                     
  conv1 = tf.nn.conv2d(conv_input, filter=filter1, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 348, 64]
  conv1 = tf.nn.relu(conv1) 
  conv2 = tf.nn.conv2d(conv2, filter=filter2, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 346, 64]                                                     
  conv2 = tf.nn.relu(conv2) 
  maxp2 = tf.nn.max_pool(conv2, ksize=[1, 1, 2, 1], strides=[1, 1, 1, 1], padding='VALID') # shape [B, 1, 178, 64]

  conv3 = tf.nn.conv2d(maxp2, filter=filter3, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 176, 128]
  conv3 = tf.nn.relu(conv3)
  conv4 = tf.nn.conv2d(conv4, filter=filter4, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 174, 128]
  conv4 = tf.nn.relu(conv4)
  maxp4 = tf.nn.max_pool(conv4, ksize=[1, 1, 2, 1], strides=[1, 1, 1, 1], padding='VALID') # shape [B, 1, 87, 128]                  
                                                       
  conv5 = tf.nn.conv2d(maxp4, filter=filter5, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 85, 256]
  conv5 = tf.nn.relu(conv5)
  conv6 = tf.nn.conv2d(conv6, filter=filter4, strides=[1, 1, 1, 1], padding="VALID") # shape [B, 1, 83, 256]
  conv6 = tf.nn.relu(conv6)
  
  up6_output_shape = deconv_output_shape(conv6.shape[0], conv6.shape[2], conv6.shape[3], "VALID")                                                     
  conv6_up = tf.nn.conv2d_transpose(conv6, filters = up6_filter, output_shape = up6_output_shape, 
                        strides = [1, 1, 1, 1], padding = "VALID") # shape [B, 1, 166, 256]
  # Convolve until shape is equal to conv4 (174). Use padding = SAME to increase width. 
  padding = [[0,0],[0,0],[3,3],[0,0]]
  conv6_padded =  tf.pad(conv6,paddings,"CONSTANT")  # shape [B, 1, 172, 256]                                                
  conv7 =  tf.nn.conv2d(conv6_padded, filter=filter7, strides=[1, 1, 1, 1], padding="SAME") # shape [B, 1, 174, 256]                                                    
  conv7 =  tf.nn.relu(conv7)
                                                       
  conc_4n7 = tf.concat([conv4, conv7], -1) # [B, 1 , 174, 384]                                                     
  up7_output_shape = deconv_output_shape(conc_4n7.shape[0], conc_4n7.shape[2], conc_4n7.shape[3], "VALID")                                                     
  conv7_up = tf.nn.conv2d_transpose(conc_4n7, filters = up7_filter, output_shape = up7_output_shape, 
                        strides = [1, 1, 1, 1], padding = "VALID") # shape [B, 1, 348, 384]
  conc_7n1 = tf.concat([conv7_up, conv1], -1) # [B, 1 , 348, 448]
  conv8 = tf.nn.conv2d(conc_7n1, filter=filter8, strides=[1, 1, 1, 1], padding="SAME") # shape [B, 1, 350, 1]
  
  conv_logits = tf.squeeze(conv8, axis = 3) # shape [B, 1, 350]
  conv_logits = tf.squeeze(conv_logits, axis = 1) # shape [B, cont_len]
                                                       
  conv_preds = tf.nn.sigmoid(conv_logits) 
                                                       
  conv_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=target_present, logits=conv_logits)               
  
  with tf.variable_scope("loss"):                                                       
      total_loss = rnn_loss + conv_loss
                                                       
  return (total_loss, rnn_preds, conv_preds)                                                     
                                                       

def model_fn_builder(bert_config, num_labels, init_checkpoint, learning_rate,
                     num_train_steps, num_warmup_steps, use_tpu,
                     use_one_hot_embeddings):
  """Returns `model_fn` closure for TPUEstimator."""

# This is the most confusing one. Note that “labels” are not passed on by the model_fn_builder. 
# They are actually passed on inside tpu_estimator when it calls the model_fn. We don’t see how. 
# Apparently we need to treat labels as per example, not per batch (to be confirmed).                                                       
                                                       
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    tf.logging.info("*** Features ***")
    for name in sorted(features.keys()):
      tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    target_start_ids = features["target_start_ids"]
    target_end_ids = features["target_end_ids"]
    target_present_ids = features["target_present_ids"]
    q_mask = features["q_mask"]
    c_mask = features["c_mask"]                                                   
                                                       
    is_real_example = None
    if "is_real_example" in features:
      is_real_example = tf.cast(features["is_real_example"], dtype=tf.float32)
    else:
      is_real_example = tf.ones(tf.shape(label_ids), dtype=tf.float32)

    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    (total_loss, probabilities) = create_model(
        bert_config, is_training, input_ids, input_mask, segment_ids, target_start_ids,
        target_end_ids, target_present_ids, q_mask, c_mask, num_labels, use_one_hot_embeddings)

    tvars = tf.trainable_variables()
    initialized_variable_names = {}
    scaffold_fn = None
    if init_checkpoint:
      (assignment_map, initialized_variable_names
      ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)
      if use_tpu:

        def tpu_scaffold():
          tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
          return tf.train.Scaffold()

        scaffold_fn = tpu_scaffold
      else:
        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    tf.logging.info("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      init_string)

    output_spec = None
    if mode == tf.estimator.ModeKeys.TRAIN:

      train_op = optimization.create_optimizer(
          total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)

      output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=total_loss,
          train_op=train_op,
          scaffold_fn=scaffold_fn)
    elif mode == tf.estimator.ModeKeys.PREDICT:

       output_spec = tf.contrib.tpu.TPUEstimatorSpec(
          mode=mode,
          predictions={"probabilities": probabilities},
          scaffold_fn=scaffold_fn)
    else:
       raise ValueError("Only TRAIN and PREDICT modes are supported: %s" %
                       (mode))                                                 
                                                       
    return output_spec

  return model_fn


# This function is not used by this file but is still used by the Colab and
# people who depend on it.
def input_fn_builder(features, seq_length, is_training, drop_remainder):
  """Creates an `input_fn` closure to be passed to TPUEstimator."""

  all_input_ids = []
  all_input_mask = []
  all_segment_ids = []
  all_target_start_ids = []
  all_target_end_ids = []
  all_target_present_ids = []
  all_q_mask = []
  all_c_mask = []                                                       

  for feature in features:
    all_input_ids.append(feature.input_ids)
    all_input_mask.append(feature.input_mask)
    all_segment_ids.append(feature.segment_ids)
    all_target_start_ids.append(feature.target_start)
    all_target_end_ids.append(feature.target_end)
    all_target_present_ids.append(feature.target_present)
    all_q_mask.append(feature.q_mask)
    all_c_mask.append(feature.c_mask)                                                                                                             
                                                       
  def input_fn(params):
    """The actual input function."""
    batch_size = params["batch_size"]

    num_examples = len(features)

    # This is for demo purposes and does NOT scale to large data sets. We do
    # not use Dataset.from_generator() because that uses tf.py_func which is
    # not TPU compatible. The right way to load data is with TFRecordReader.
    d = tf.data.Dataset.from_tensor_slices({
        "input_ids":
            tf.constant(
                all_input_ids, shape=[num_examples, seq_length],
                dtype=tf.int32),
        "input_mask":
            tf.constant(
                all_input_mask,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
        "segment_ids":
            tf.constant(
                all_segment_ids,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
        "target_start_ids":
            tf.constant(
                all_target_start_ids,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
        "segment_ids":
            tf.constant(
                all_target_end_ids,
                shape=[num_examples,seq_length],
                dtype=tf.int32),
        "segment_ids":
            tf.constant(
                all_target_present_ids,
                shape=[num_examples, seq_length],
                dtype=tf.int32),
    })

    if is_training:
      d = d.repeat()
      d = d.shuffle(buffer_size=100)

    d = d.batch(batch_size=batch_size, drop_remainder=drop_remainder)
    return d

  return input_fn


# This function is not used by this file but is still used by the Colab and
# people who depend on it.
def convert_examples_to_features(examples, label_list, max_seq_length,
                                 tokenizer):
  """Convert a set of `InputExample`s to a list of `InputFeatures`."""

  features = []
  for (ex_index, example) in enumerate(examples):
    if ex_index % 10000 == 0:
      tf.logging.info("Writing example %d of %d" % (ex_index, len(examples)))

    feature = convert_single_example(ex_index, example, label_list,
                                     max_seq_length, tokenizer)

    features.append(feature)
  return features


def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  processors = {
      "": KeplerProcessor,
  }

  tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
                                                FLAGS.init_checkpoint)

  if not FLAGS.do_train and not FLAGS.do_eval and not FLAGS.do_predict:
    raise ValueError(
        "At least one of `do_train`, `do_eval` or `do_predict' must be True.")

  bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)

  if FLAGS.max_seq_length > bert_config.max_position_embeddings:
    raise ValueError(
        "Cannot use sequence length %d because the BERT model "
        "was only trained up to sequence length %d" %
        (FLAGS.max_seq_length, bert_config.max_position_embeddings))

  tf.gfile.MakeDirs(FLAGS.output_dir)

  task_name = FLAGS.task_name.lower()

  if task_name not in processors:
    raise ValueError("Task not found: %s" % (task_name))

  processor = processors[task_name]()

  label_list = processor.get_labels()

  tokenizer = tokenization.FullTokenizer(
      vocab_file=FLAGS.vocab_file, do_lower_case=FLAGS.do_lower_case)

  tpu_cluster_resolver = None
  if FLAGS.use_tpu and FLAGS.tpu_name:
    tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
        FLAGS.tpu_name, zone=FLAGS.tpu_zone, project=FLAGS.gcp_project)

  is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2
  run_config = tf.contrib.tpu.RunConfig(
      cluster=tpu_cluster_resolver,
      master=FLAGS.master,
      model_dir=FLAGS.output_dir,
      save_checkpoints_steps=FLAGS.save_checkpoints_steps,
      tpu_config=tf.contrib.tpu.TPUConfig(
          iterations_per_loop=FLAGS.iterations_per_loop,
          num_shards=FLAGS.num_tpu_cores,
          per_host_input_for_training=is_per_host))

  train_examples = None
  num_train_steps = None
  num_warmup_steps = None
  if FLAGS.do_train:
    train_examples = processor.get_train_examples(FLAGS.data_dir)
    num_train_steps = int(
        len(train_examples) / FLAGS.train_batch_size * FLAGS.num_train_epochs)
    num_warmup_steps = int(num_train_steps * FLAGS.warmup_proportion)

  model_fn = model_fn_builder(
      bert_config=bert_config,
      num_labels=len(label_list),
      init_checkpoint=FLAGS.init_checkpoint,
      learning_rate=FLAGS.learning_rate,
      num_train_steps=num_train_steps,
      num_warmup_steps=num_warmup_steps,
      use_tpu=FLAGS.use_tpu,
      use_one_hot_embeddings=FLAGS.use_tpu)

  # If TPU is not available, this will fall back to normal Estimator on CPU
  # or GPU.
  estimator = tf.contrib.tpu.TPUEstimator(
      use_tpu=FLAGS.use_tpu,
      model_fn=model_fn,
      config=run_config,
      train_batch_size=FLAGS.train_batch_size,
      eval_batch_size=FLAGS.eval_batch_size,
      predict_batch_size=FLAGS.predict_batch_size)

  if FLAGS.do_train:
    train_file = os.path.join(FLAGS.output_dir, "train.tf_record")
    file_based_convert_examples_to_features(
        train_examples, label_list, FLAGS.max_seq_length, tokenizer, train_file)
    tf.logging.info("***** Running training *****")
    tf.logging.info("  Num examples = %d", len(train_examples))
    tf.logging.info("  Batch size = %d", FLAGS.train_batch_size)
    tf.logging.info("  Num steps = %d", num_train_steps)
    train_input_fn = file_based_input_fn_builder(
        input_file=train_file,
        seq_length=FLAGS.max_seq_length,
        is_training=True,
        drop_remainder=True)
    estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)

  if FLAGS.do_predict:
    predict_examples = processor.get_test_examples(FLAGS.data_dir)
    num_actual_predict_examples = len(predict_examples)
    if FLAGS.use_tpu:
      # TPU requires a fixed batch size for all batches, therefore the number
      # of examples must be a multiple of the batch size, or else examples
      # will get dropped. So we pad with fake examples which are ignored
      # later on.
      while len(predict_examples) % FLAGS.predict_batch_size != 0:
        predict_examples.append(PaddingInputExample())

    predict_file = os.path.join(FLAGS.output_dir, "predict.tf_record")
    file_based_convert_examples_to_features(predict_examples, label_list,
                                            FLAGS.max_seq_length, tokenizer,
                                            predict_file)

    tf.logging.info("***** Running prediction*****")
    tf.logging.info("  Num examples = %d (%d actual, %d padding)",
                    len(predict_examples), num_actual_predict_examples,
                    len(predict_examples) - num_actual_predict_examples)
    tf.logging.info("  Batch size = %d", FLAGS.predict_batch_size)

    predict_drop_remainder = True if FLAGS.use_tpu else False
    predict_input_fn = file_based_input_fn_builder(
        input_file=predict_file,
        seq_length=FLAGS.max_seq_length,
        is_training=False,
        drop_remainder=predict_drop_remainder)

    result = estimator.predict(input_fn=predict_input_fn)

    output_predict_file = os.path.join(FLAGS.output_dir, "test_results.tsv")
    with tf.gfile.GFile(output_predict_file, "w") as writer:
      num_written_lines = 0
      tf.logging.info("***** Predict results *****")
      for (i, prediction) in enumerate(result):
        probabilities = prediction["probabilities"]
        if i >= num_actual_predict_examples:
          break
        output_line = "\t".join(
            str(class_probability)
            for class_probability in probabilities) + "\n"
        writer.write(output_line)
        num_written_lines += 1
    assert num_written_lines == num_actual_predict_examples


if __name__ == "__main__":
  flags.mark_flag_as_required("data_dir")
  flags.mark_flag_as_required("task_name")
  flags.mark_flag_as_required("vocab_file")
  flags.mark_flag_as_required("bert_config_file")
  flags.mark_flag_as_required("output_dir")
  tf.app.run()
Exemple #14
0
N = 20
D = 3
D1 = 4
D2 = 5
X_train = np.random.rand(N,D)
X_train = 2*X_train + 3
x = tf.placeholder(tf.float64, shape=[None,D], name='x-input')

mu1 = 0.0
mu2 = mu1
muC = mu2
std1 = 0.1
std2 = std1
stdC = std2
const1 = 0.1
const2 = const1

W1 = tf.Variable(tf.truncated_normal(shape=[D,D1], mean=mu1, stddev=std1, dtype=tf.float64))
b1 = tf.Variable(tf.constant(const1,shape=[D1],dtype=tf.float64))
W2 = tf.Variable(tf.truncated_normal(shape=[D,D1], mean=mu2, stddev=std2, dtype=tf.float64))
b2 = tf.Variable(tf.constant(const2,shape=[D2],dtype=tf.float64))
C = tf.Variable(tf.truncated_normal(shape=[D,D1], mean=muC, stddev=stdC, dtype=tf.float64))

z1 = tf.matmul(x,W1) + b1
a1 = tf.relu(z)
z2 = tf.matmul(a1,W2) + b2
a2 = tf.rely(z2)

with tf.Session() sess:
Exemple #15
0
 def relu_kernel(self, x):
     return tf.relu(tf.expand_dims(x, axis=self.unsqueeze_dim) - self.dict)
Exemple #16
0
 def activation(x):
     return tf.relu(x)
Exemple #17
0
    def build_predict(
        self,
        inputs,
        reverse_preds=None,
        embed_penultimate=False,
        target_subset=None,
        save_reprs=False,
    ):
        """Construct per-location real-valued predictions."""
        assert inputs is not None
        print("Targets pooled by %d to length %d" %
              (self.hp.target_pool, self.hp.seq_length // self.hp.target_pool))

        if self.hp.augment_mutation > 0:
            # sample mutation binary mask across sequences
            mut_mask_probs = self.hp.augment_mutation * np.ones(
                (self.hp.seq_length, 1))
            mut_mask_dist = tfp.distributions.Bernoulli(probs=mut_mask_probs,
                                                        dtype=tf.float32)
            mut_mask = mut_mask_dist.sample(tf.shape(inputs)[0])

            # sample random nucleotide for mutations
            mut_1hot_probs = 0.25 * np.ones((self.hp.seq_length, 4))
            mut_1hot_dist = tfp.distributions.OneHotCategorical(
                probs=mut_1hot_probs, dtype=tf.float32)
            mut_1hot = mut_1hot_dist.sample(tf.shape(inputs)[0])

            # modify sequence
            inputs_mut = inputs - mut_mask * inputs + mut_mask * mut_1hot
            inputs = tf.cond(self.is_training, lambda: inputs_mut,
                             lambda: inputs)

        ###################################################
        # convolution layers
        ###################################################
        filter_weights = []
        layer_reprs = [inputs]

        seqs_repr = inputs
        for layer_index in range(self.hp.cnn_layers):
            with tf.variable_scope("cnn%d" % layer_index, reuse=tf.AUTO_REUSE):
                # convolution block
                args_for_block = self._make_conv_block_args(
                    layer_index, layer_reprs)
                seqs_repr = layers.conv_block(seqs_repr=seqs_repr,
                                              **args_for_block)

                # save representation
                layer_reprs.append(seqs_repr)

        if save_reprs:
            self.layer_reprs = layer_reprs

        # final nonlinearity
        if self.hp.nonlinearity == "relu":
            seqs_repr = tf.nn.relu(seqs_repr)
        elif self.hp.nonlinearity == "gelu":
            seqs_repr = tf.nn.sigmoid(1.702 * seqs_repr) * seqs_repr
        else:
            print('Unrecognized nonlinearity "%s"' % self.hp.nonlinearity,
                  file=sys.stderr)
            exit(1)

        ###################################################
        # slice out side buffer
        ###################################################

        # update batch buffer to reflect pooling
        seq_length = seqs_repr.shape[1].value
        pool_preds = self.hp.seq_length // seq_length
        assert self.hp.batch_buffer % pool_preds == 0, (
            "batch_buffer %d not divisible"
            " by the CNN pooling %d") % (self.hp.batch_buffer, pool_preds)
        batch_buffer_pool = self.hp.batch_buffer // pool_preds

        # slice out buffer
        seq_length = seqs_repr.shape[1]
        seqs_repr = seqs_repr[:, batch_buffer_pool:seq_length -
                              batch_buffer_pool, :]
        seq_length = seqs_repr.shape[1]

        ###################################################
        # final layer
        ###################################################
        if embed_penultimate:
            final_repr = seqs_repr
        else:
            with tf.variable_scope("final", reuse=tf.AUTO_REUSE):
                final_filters = self.hp.sum_targets * self.hp.target_classes
                final_repr = tf.layers.dense(
                    inputs=seqs_repr,
                    units=final_filters,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0, mode="fan_in"),
                    kernel_regularizer=tf.contrib.layers.l1_regularizer(
                        self.hp.final_l1_scale),
                )
                print("Convolution w/ %d %dx1 filters to final targets" %
                      (final_filters, seqs_repr.shape[2]))

                if target_subset is not None:
                    # get convolution parameters
                    filters_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, "final/dense/kernel")[0]
                    bias_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, "final/dense/bias")[0]

                    # subset to specific targets
                    filters_subset = tf.gather(filters_full,
                                               target_subset,
                                               axis=1)
                    bias_subset = tf.gather(bias_full, target_subset, axis=0)

                    # substitute a new limited convolution
                    final_repr = tf.tensordot(seqs_repr, filters_subset, 1)
                    final_repr = tf.nn.bias_add(final_repr, bias_subset)

                    # update # targets
                    self.hp.sum_targets = len(target_subset)

                # expand length back out
                if self.hp.target_classes > 1:
                    final_repr = tf.reshape(
                        final_repr,
                        (-1, seq_length, self.hp.sum_targets,
                         self.hp.target_classes),
                    )

        # transform for reverse complement
        if reverse_preds is not None:
            final_repr = tf.cond(
                reverse_preds,
                lambda: tf.reverse(final_repr, axis=[1]),
                lambda: final_repr,
            )

        ###################################################
        # link function
        ###################################################
        if embed_penultimate:
            predictions = final_repr
        else:
            # work-around for specifying my own predictions
            # self.preds_adhoc = tf.placeholder(
            #     tf.float32, shape=final_repr.shape, name='preds-adhoc')

            # float 32 exponential clip max
            exp_max = 50

            # choose link
            if self.hp.link in ["identity", "linear"]:
                predictions = tf.identity(final_repr, name="preds")

            elif self.hp.link == "relu":
                predictions = tf.relu(final_repr, name="preds")

            elif self.hp.link == "exp":
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max,
                                                   exp_max)
                predictions = tf.exp(final_repr_clip, name="preds")

            elif self.hp.link == "exp_linear":
                predictions = tf.where(
                    final_repr > 0,
                    final_repr + 1,
                    tf.exp(tf.clip_by_value(final_repr, -exp_max, exp_max)),
                    name="preds",
                )

            elif self.hp.link == "softplus":
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max, 10000)
                predictions = tf.nn.softplus(final_repr_clip, name="preds")

            else:
                print("Unknown link function %s" % self.hp.link,
                      file=sys.stderr)
                exit(1)

            # clip
            if self.hp.target_clip is not None:
                predictions = tf.clip_by_value(predictions, 0,
                                               self.hp.target_clip)

            # sqrt
            if self.hp.target_sqrt:
                predictions = tf.sqrt(predictions)

        return predictions
Exemple #18
0
    def build_model(self):
        self.user = tf.placeholder(shape=[None,],dtype=tf.int32)
        self.item = tf.placeholder(shape=[None,],dtype=tf.int32)
        self.text = tf.placeholder(shape=[None,self.review_length],dtype=tf.int32)
        self.rating = tf.placeholder(shape=[None,],dtype=tf.float32)
        self.phrase = tf.placeholder(False,dtype=tf.bool)
        with tf.name_scope('embedding/word_embedding'):
            word_embedding = tf.Variable(tf.random.uniform(shape=[self.vocab_size,self.factor_num],minval=-0.1,maxval=0.1))
        context = tf.nn.embedding_lookup(word_embedding,self.text)

        with tf.name_scope('embedding/user_embedding'):
            user_embedding = tf.Variable(tf.random_uniform(shape=[self.user_num,self.factor_num],minval=-0.1,maxval=0.1))
        uvec = tf.nn.embedding_lookup(user_embedding,self.user)

        with tf.name_scope('embedding/item_embedding'):
            item_embedding = tf.Variable(tf.random_uniform(shape=[self.user_num,self.factor_num],minval=-0.1,maxval=0.1))
        ivec = tf.nn.embedding_lookup(item_embedding,self.item)
        # convoluntional layers
        context = tf.expand_dims(context,axis=-1) # None*review_length*factor_num*1
        pools = []
        for size in self.filter_size:
            filter_kernal = [size,self.factor_num,self.filter_num,1]
            with tf.name_scope('conv_{}'.format(size)):
                filter_weights = tf.Variable(tf.random_normal(shape=filter_kernal,stddev=0.1))
                filter_biases =  tf.Variable(tf.random_normal(shape=[self.filter_num],stddev=0.1))
            conv = tf.nn.conv2d(context,filter_weights,strides=[1,1,1,1],padding='VALID')
            conv = tf.nn.bias_add(conv,filter_biases)
            pool_kernal = [1,self.review_length-size+1,1,1]
            pool = tf.nn.max_pool(conv,ksize=pool_kernal,strides=[1,1,1,1],padding='VALID')
            pools.append(pool)
        num_feature_total = self.filter_num * len(self.filter_size)
        pooled_total = tf.concat(pools,3)
        pooled_total = tf.reshape(pooled_total,[-1,num_feature_total])

        # gate
        with tf.name_scope('gate/user_gate'):
            Wxcr = tf.Variable(tf.random_normal(shape=[num_feature_total,self.factor_num]))
            Wxur = tf.Variable(tf.random_normal(shape=[self.factor_num,self.factor_num]))
            Wxch = tf.Variable(tf.random_normal(shape=[num_feature_total, self.factor_num]))
            Wxuh = tf.Variable(tf.random_normal(shape=[self.factor_num, self.factor_num]))
            bxr = tf.Variable(tf.constant(0.0,shape=[self.factor_num]))
            bxh = tf.Variable(tf.constant(0.0, shape=[self.factor_num]))
            Wxcz = tf.Variable(tf.random_normal(shape=[num_feature_total,self.factor_num]))
            Wxuz = tf.Variable(tf.random_normal(shape=[self.factor_num,self.factor_num]))
            bxz = tf.Variable(tf.constant(0.0,shape=[self.factor_num]))
        xr = tf.add_n(tf.matmul(pooled_total,Wxcr),tf.matmul(uvec,Wxur),bxr)
        xz = tf.add_n(tf.matmul(pooled_total,Wxcz),tf.matmul(uvec,Wxuz),bxz)
        uvec_hat = tf.tanh(tf.add_n(tf.matmul(pooled_total,Wxch),tf.maltiply(xr,tf.matmul(uvec,Wxuh)),bxh))
        uvec_final = tf.multiply(xz,uvec_hat) + tf.multiply((1-xz),uvec_hat)
        with tf.name_scope('gate/item_gate'):
            Wycr = tf.Variable(tf.random_normal(shape=[num_feature_total,self.factor_num]))
            Wyir = tf.Variable(tf.random_normal(shape=[self.factor_num,self.factor_num]))
            Wych = tf.Variable(tf.random_normal(shape=[num_feature_total, self.factor_num]))
            Wyuh = tf.Variable(tf.random_normal(shape=[self.factor_num, self.factor_num]))
            byr = tf.Variable(tf.constant(0.0,shape=[self.factor_num]))
            byh = tf.Variable(tf.constant(0.0, shape=[self.factor_num]))
            Wycz = tf.Variable(tf.random_normal(shape=[num_feature_total,self.factor_num]))
            Wyiz = tf.Variable(tf.random_normal(shape=[self.factor_num,self.factor_num]))
            byz = tf.Variable(tf.constant(0.0,shape=[self.factor_num]))
        yr = tf.add_n(tf.matmul(pooled_total,Wycr),tf.matmul(ivec,Wyir),byr)
        yz = tf.add_n(tf.matmul(pooled_total,Wycz),tf.matmul(uvec,Wyiz),byz)
        ivec_hat = tf.tanh(tf.add_n(tf.matmul(pooled_total, Wych),tf.maltiply(yr,tf.matmul(ivec, Wyuh)), byh))
        ivec_final = tf.multiply(yz, ivec_hat) + tf.multiply((1 - yz), ivec_hat)
        if self.phrase:
            final = tf.concat([uvec,ivec],axis=1)
        else:
            final = tf.concat([uvec_final,ivec_final],axis=1)
        with tf.name_scope('full_connected'):
            W1 = tf.Variable(tf.random_normal(shape=[2*self.factor_num,self.factor_num]))
            b1 = tf.Variable(tf.constant(0.0,shape=[self.factor_num]))
            W2 = tf.Variable(tf.random_normal(shape=[self.factor_num, 1]))
            b2 = tf.Variable(tf.constant(0.0, shape=[1]))
        f1 = tf.relu(tf.add(tf.matmul(final,W1),b1))
        f2 = tf.relu(tf.add(tf.matmul(f1, W2), b2))

        self.mse = tf.reduce_mean(tf.square(tf.subtract(tf.reduce_sum(f2,axis=1),self.rating)))
        self.mae = tf.reduce_mean(tf.abs(tf.subtract(tf.reduce_sum(f2, axis=1), self.rating)))
        self.opt = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.mse)
        init = tf.global_variables_initializer()
        self.sess.run(init)
Exemple #19
0
def maxpool_layer_dual_objective(kernel_shape,
                                 strides,
                                 with_relu,
                                 mu_in,
                                 lam_out,
                                 lb,
                                 ub,
                                 nominal=None):
    """Calculates the contribution to the dual objective of an N-D max pool layer.

  Maximises (over y in [lb, ub])::
    mu_l^T y  -  lam_l^T h_l(y)
  where `h` is the specified max pool operation.

  If `nominal` is not `None`, then inputs and maxima are interpreted
  relative to nominal inputs and outputs respectively, so we actually maximise::
    mu_l^T y - lam_l^T (h_l(nominal+y) - h_l(nominal))`.

  This formulation only supports maxpools that cover the input space without
  gaps. Overlaps are permitted, although they will give rise to an overestimate
  of the dual objective rather than a tight value.

  Args:
    kernel_shape: Integer list of `[kernel_height, kernel_width]`,
      or `None` to aggregate over the layer`s entire spatial extent.
    strides: Integer list of `[vertical_stride, horizontal_stride]`.
    with_relu: Whether to apply `tf.nn.relu` to the maxpool.
    mu_in: (N+3)D tensor of shape (num_classes, batch_size,
      input_height, input_width, layer_channels) containing
      Lagrange multipliers for the neurons' linear calculations.
    lam_out: (N+3)D tensor of shape (num_classes, batch_size,
      output_height, output_width, layer_channels) containing
      Lagrange multipliers for the neurons' maxpool calculations.
    lb: (N+2)D tensor of shape (batch_size,
      input_height, input_width, layer_channels) containing
      lower bounds of the neurons' pre-maxpool values.
    ub: (N+2)D tensor of shape (batch_size,
      input_height, input_width, layer_channels) containing
      upper bounds of the neurons' pre-maxpool values.
    nominal: (N+2)D tensor of shape (batch_size, input_height, input_width,
      layer_channels) containing nominal input values. Inputs bounds are
      interpreted relative to these nominal values. Defaults to zero.

  Returns:
    2D tensor of shape (num_classes, batch_size) containing dual objective
      contribution for each example.

  Raises:
    ValueError: if the pools overlap or have gaps.
  """
    if nominal is not None:
        # TODO(stanforth) investigate a more numerically stable implementation
        res = maxpool_layer_dual_objective(kernel_shape, strides, with_relu,
                                           mu_in, lam_out, nominal + lb,
                                           nominal + ub)

        # Infer the nominal outputs.
        if kernel_shape is None:
            nominal_out = tf.reduce_max(nominal,
                                        axis=list(
                                            range(1, nominal.shape.ndims - 1)))
        else:
            nominal_out = tf.nn.max_pool(nominal,
                                         ksize=kernel_shape,
                                         padding='VALID',
                                         strides=([1] + strides + [1]))
        if with_relu:
            nominal_out = tf.relu(nominal_out)

        res -= tf.reduce_sum(mu_in * nominal,
                             axis=list(range(2, mu_in.shape.ndims)))
        res += tf.reduce_sum(lam_out * nominal_out,
                             axis=list(range(2, lam_out.shape.ndims)))
        return res

    # Search for maximum by branching over inputs (kernel elements).

    # Broadcast the tensors to match what `fn` will operate with, i.e. shape
    # (num_classes, batch_size, output_height, output_width,
    #  kernel_height * kernel_width, layer_channels).

    num_classes = mu_in.shape[0].value
    batch_size = tf.shape(mu_in)[1]
    input_shape = mu_in.shape[2:].as_list()
    layer_channels = mu_in.shape[-1].value
    output_spatial_shape = lam_out.shape[2:-1].as_list()
    nd = lam_out.shape.ndims - 3

    if kernel_shape is None:
        # Maxpool will be across the entire layer (in each channel).
        kernel_size = _prod(input_shape[:-1])
        lb_bc = lb
        ub_bc = ub
        mu_bc = mu_in

    else:
        for i in range(len(kernel_shape)):
            if kernel_shape[i] < strides[i]:
                raise ValueError(
                    'The pools must tile the entire input space without gaps.')
        padding = 'VALID'

        # Determine the fan-out of each input, where the pools overlap.
        # Builds a tensor of shape (1, 1, input_height, input_width, 1) of the form
        # [[1,1,2,1,1], [1,1,2,1,1], [2,2,4,2,2], [1,1,2,1,1], [1,1,2,1,1]]
        # (illustrated here with 3x3 kernel with stride 2 on a 5x5 input).
        overlap = common.conv_reduce_sum(tf.ones(
            dtype=mu_in.dtype,
            shape=([1, 1] + output_spatial_shape + [1] + kernel_shape + [1])),
                                         input_shape,
                                         padding=padding,
                                         strides=strides)
        # Share mu values equally amongst pools where they overlap.
        mu_in /= overlap

        # Broadcast the bounds and mu vars where the kernel applications overlap.
        kernel_size = _prod(kernel_shape)
        lb_bc = common.conv_broadcast(lb,
                                      kernel_shape,
                                      padding=padding,
                                      strides=strides)
        ub_bc = common.conv_broadcast(ub,
                                      kernel_shape,
                                      padding=padding,
                                      strides=strides)
        # Temporarily combine the (num_classes, batch_size) dimensions
        # while applying the broadcast to mu.
        mu_bc = tf.reshape(mu_in,
                           shape=([num_classes * batch_size] +
                                  mu_in.shape[2:].as_list()))
        mu_bc = common.conv_broadcast(mu_bc,
                                      kernel_shape,
                                      padding=padding,
                                      strides=strides)
        # conv_broadcast has returned tensors of shape
        # (N, output_height, output_width, 1, kernel_height, kernel_width, C).

    lb_bc = tf.reshape(lb_bc,
                       shape=([1, batch_size] + output_spatial_shape +
                              [kernel_size, layer_channels]))
    ub_bc = tf.reshape(ub_bc,
                       shape=([1, batch_size] + output_spatial_shape +
                              [kernel_size, layer_channels]))
    mu_bc = tf.reshape(
        mu_bc,
        shape=([num_classes, batch_size] + output_spatial_shape +
               [kernel_size, layer_channels]))
    lb_bc += tf.zeros_like(mu_bc)
    ub_bc += tf.zeros_like(mu_bc)

    # Use the same lambda for each input.
    lam_bc = tf.expand_dims(lam_out, axis=(nd + 2))

    # All xx_bc tensors are shaped as (class, N, H, W, i, C)
    # where i ranges over inputs (kernel elements).

    # To calculate for input (kernel element) i, we need to sum over inputs j.
    # Set up xx_i, xx_j tensors shaped as (class, N, H, W, i, j, C)
    # where i,j both range over inputs (kernel elements).

    # y_i = tf.expand_dims(y, nd+3)  (will create inside `fn`)
    mu_j = tf.expand_dims(mu_bc, nd + 2)
    lb_j = tf.expand_dims(lb_bc, nd + 2)
    ub_j = tf.expand_dims(ub_bc, nd + 2)
    # Only consider j != i.
    mask = 1.0 - tf.expand_dims(tf.eye(kernel_size), -1)

    def fn(y):
        """Optimal dual objective, conditional on the value of the maxpool.

    For each input (kernel element) i, for the given y_i,
    maximises (over z_j in [lb_j, min{y_i, ub_j}] and constraining z_i=y_i)::
      mu^T z  -  lam y_i

    This will be infeasible if y_i < lb_j for some j, (also if y_i < 0 in the
    case of relu+maxpool), so maxpool cannot be attained at input i. The
    returned tensor is unspecified for such elements.

    Args:
      y: (N+4)D tensor of shape (num_classes, batch_size,
        output_height, output_width,
        kernel_height * kernel_width, layer_channels) containing, for each
        input (kernel element) i, a value of maxpool assumed to be attained
        at input i.

    Returns:
      Tensor of same shape as `y` containing, for each input (kernel element) i,
        the optimal value of the dual objective, conditional the maxpool being
        equal to `y` with the maximum attained at input i.
    """
        y_i = tf.expand_dims(y, nd + 3)
        # Maximise sum_{j!=i} mu_j y_j  where y_j <= y_i for all j!=i.
        obj = max_linear(mask * mu_j,
                         lb_j,
                         tf.minimum(ub_j, y_i),
                         axis=(nd + 3))
        return obj + (mu_bc - lam_bc) * y

    lb_max = tf.reduce_max(lb_bc, axis=(nd + 2), keepdims=True)
    if with_relu:
        lb_max = tf.maximum(lb_max, 0.)
    _, attained = common.concave_max_binsearch(fn,
                                               tf.zeros_like(lb_bc) + lb_max,
                                               ub_bc)

    # Filter out any infeasible choices of i.
    attained = tf.where(
        lb_max <= ub_bc, attained,
        tf.zeros_like(attained) +
        tf.reduce_min(attained, axis=(nd + 2), keepdims=True))

    # Maximise over which input (kernel element) maximises the maxpool.
    per_neuron_objective = tf.reduce_max(attained, axis=(nd + 2))

    if with_relu:
        # The relu+maxpool may additionally be 'maximised' by zero.
        # Calculate optimal dual objective, conditional on all y_i <= 0.
        # Maximise (over z_j in [lb_j, min{0, ub_j}])::
        #   mu^T z  -  lam 0
        attained_zero = max_linear(mu_bc,
                                   lb_bc,
                                   tf.minimum(ub_bc, 0.),
                                   axis=(nd + 2))

        # Filter out any infeasible cases.
        per_neuron_objective = tf.where(
            tf.squeeze(lb_max, axis=(nd + 2)) <= 0.,
            tf.maximum(per_neuron_objective, attained_zero),
            per_neuron_objective)

    return tf.reduce_sum(per_neuron_objective,
                         axis=list(range(2, per_neuron_objective.shape.ndims)))
Exemple #20
0
import tensorflow as tf

# placeholder for input to the computation
x = tf.placeholder(dtype=tf.float32, name="x")

# bias variable for the affine weight transformation
b = tf.Variable(tf.zeros(100))

# weight variable for the affine wegiht transformation with random values
W = tf.Variable(tf.random_uniform([784, 100]), tf.float32)

# activation as a function of the weight transformation
a = tf.relu(tf.matmul(W, x) + b)

# cost computed as a function of the activation
# and the target optimization task
C = [...]

# Start session to run the computational graph
session = tf.InteractiveSession()

# Initialize all variables, in this example only the weight
# matrix depends on an initialization
tf.global_variables_initializer()

for i in range(epochs):
    result = session.run(C, feed_dict={x: data[batch_indices]})
    print(i, result)
Exemple #21
0
import numpy as np

tf.set_random_seed(777)

x_data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32)
y_data = np.array([[0], [1], [1], [0]], dtype=np.float32)

# x,y,w,b hypothesis, cost, train
# sigmoid

X = tf.placeholder(tf.float32, shape=[None, 2])
y = tf.placeholder(tf.float32, shape=[None, 1])

w1 = tf.Variable(tf.zeros([2, 100]), name='weight1')
b1 = tf.Variable(tf.zeros([100]), name='bias1')
layer1 = tf.relu(tf.matmul(X, w1) + b1)

w2 = tf.Variable(tf.zeros([100, 50]), name='weight2')
b2 = tf.Variable(tf.zeros([50]), name='bias2')
layer2 = tf.sigmoid(tf.matmul(layer1, w2) + b2)

w3 = tf.Variable(
    tf.zeros([50, 1]),
    name='weight3',
)
b3 = tf.Variable(tf.zeros([1]), name='bias3')
hypothesis = tf.sigmoid(tf.matmul(layer2, w3) + b3)

# tf.matmul(x,w) => w*x  x => (5.3)  w => (3,1) w*x => (5,1) 행렬연산을 도와줌

cost = -tf.reduce_mean(y * tf.log(hypothesis) +
Exemple #22
0
    def inference(self,
                  input_images,
                  sentences,
                  embedding_dictionary,
                  trainable,
                  initialized_vgg_parameter_file=None):
        with tf.variable_scope(self.name):
            # extract image context feature
            tensor_trainable = tf.constant(trainable)
            vgg19 = VGG19('vgg19')
            vgg19_predict, vgg19_context = vgg19.inference(
                input_images, tensor_trainable, initialized_vgg_parameter_file)

            vgg19_context_shape = vgg19_context.get_shape().as_list()
            batch_size = vgg19_context_shape[0]
            vgg19_context_num = vgg19_context_shape[1] * vgg19_context_shape[2]
            vgg19_context_dim = vgg19_context_shape[3]
            vgg19_context_reshape = tf.reshape(
                vgg19_context, [-1, vgg19_context_num, vgg19_context_dim])
            vgg19_context_reshape_mean = tf.reduce_mean(
                vgg19_context_reshape, 1)

            init_memory = vgg19_context_reshape_mean
            for i in xrange(_MLP_LAYER_NUMBER_):
                init_memory = _construct_full_connection_layer(
                    init_memory,
                    _RNN_HIDDEN_NUMER_,
                    name='init_memory_fc' + str(i))
                init_memory = tf.contrib.layers.batch_norm(
                    init_memory,
                    decay=self.momentum,
                    updates_collections=None,
                    epsilon=self.epsilon,
                    scale=True,
                    is_training=trainable,
                    scope='init_memory_bn' + str(i))

            init_lstm_output = vgg19_context_reshape_mean
            for i in xrange(_MLP_LAYER_NUMBER_):
                init_lstm_output = _construct_full_connection_layer(
                    init_lstm_output,
                    _RNN_HIDDEN_NUMER_,
                    name='init_hidden_state_fc' + str(i))
                init_lstm_output = tf.contrib.layers.batch_norm(
                    init_lstm_output,
                    decay=self.momentum,
                    updates_collections=None,
                    epsilon=self.epsilon,
                    scale=True,
                    is_training=trainable,
                    scope='init_lstm_output_bn' + str(i))

            lstm_state = tf.contrib.rnn.LSTMStateTuple(init_memory,
                                                       init_lstm_output)
            lstm = tf.contrib.rnn.LSTMCell(
                _RNN_HIDDEN_NUMER_,
                initializer=tf.random_normal_initializer(stddev=0.03))

            vgg19_context_flat = tf.reshape(vgg19_context_reshape,
                                            [-1, vgg19_context_dim])

            max_sentence_length = sentences.get_shape().as_list()[-1]
            print max_sentence_length
            print 'aaaaaaaaa'
            dim_embed = embedding_dictionary.get_shape().as_list()[-1]
            word_number = embedding_dictionary.get_shape().as_list()[0]
            tensor_output = []
            tensor_output_prob = []
            for i in xrange(max_sentence_length):
                # attention mechanism
                context_encode1 = _construct_full_connection_layer(
                    vgg19_context_flat, vgg19_context_dim, name='att_fc11')
                context_encode1 = tf.nn.relu(context_encode1)
                context_encode1 = tf.contrib.layers.batch_norm(
                    context_encode1,
                    decay=self.momentum,
                    updates_collections=None,
                    epsilon=self.epsilon,
                    scale=True,
                    is_training=trainable,
                    scope='att_bn11' + str(i))

                context_encode2 = _construct_full_connection_layer(
                    lstm_state, vgg19_context_dim, name='att_fc21')
                context_encode2 = tf.nn.relu(context_encode2)
                context_encode2 = tf.contrib.layers.batch_norm(
                    context_encode2,
                    decay=self.momentum,
                    updates_collections=None,
                    epsilon=self.epsilon,
                    scale=True,
                    is_training=trainable,
                    scope='att_bn21' + str(i))
                context_encode2 = tf.tile(tf.expand_dims(context_encode2, 1),
                                          [1, vgg19_context_num, 1])
                context_encode2 = tf.reshape(context_encode2,
                                             [-1, vgg19_context_dim])

                context_encode = tf.relu(context_encode1 + context_encode2)
                context_encode = tf.cond(
                    tensor_trainable,
                    lambda: tf.nn.dropout(context_encode, 0.5),
                    lambda: context_encode)

                attention = _construct_full_connection_layer(context_encode,
                                                             1,
                                                             name='att_1')
                attention = tf.nn.relu(attention)
                attention = tf.reshape(attention, [-1, vgg19_context_num])
                attention = tf.nn.softmax(attention)

                if i == 0:
                    word_emb = tf.zeros([batch_size, dim_embed])
                    weighted_context = tf.identity(vgg19_context_reshape_mean)
                else:
                    word_emb = tf.cond(
                        is_train, lambda: tf.nn.embedding_lookup(
                            embedding_dictionary, sentences[:, i - 1]),
                        lambda: word_emb)
                    weighted_context = tf.reduce_sum(
                        vgg19_context_reshape * tf.expand_dims(attention, 2),
                        1)

                lstm_output, lstm_state = lstm(
                    tf.concat(1, [weighted_context, word_emb]), lstm_state)
                feature_concate = tf.concat(
                    1, [lstm_output, weighted_context, word_emb])
                output0 = _construct_full_connection_layer(feature_concate,
                                                           _LAST_FC_DIMENSION_,
                                                           name='output_fc1')
                output0 = tf.nn.tanh(output0)
                output0 = tf.cond(tensor_trainable,
                                  lambda: tf.nn.dropout(output0, 0.5),
                                  lambda: output0)

                output = _construct_full_connection_layer(output0, word_number)
                prob = tf.nn.softmax(output)

                tensor_output.append(output)
                tensor_output_prob.append(prob)

                max_prob_word = tf.argmax(output, 1)
                word_emb = tf.cond(
                    tensor_trainable, lambda: word_emb,
                    lambda: tf.nn.embedding_lookup(emb_w, max_prob_word))
                tf.get_variable_scope().reuse_variables()

            tensor_output = tf.pack(tensor_output, axis=1)
            tensor_output_prob = tf.pack(tensor_output_prob, axis=1)

        return tensor_output, tensor_output_prob
Exemple #23
0
def conv3d(x, w, b, strides=[1, 1, 1, 1, 1], padding="SAME"):
    X = tf.nn.conv3d(x, w, strides=[1, 1, 1, 1, 1], padding="SAME")
    x = tf.nn.bias_add(x, b)
    return tf.relu(x)
inputs = tf.placeholder(tf.int64, [None, s_limit_len], name="inputs")
labels = tf.placeholder(tf.int64, [None, n_class], name="labels")
keep_prob = tf.placeholder(tf.float32)

embedding_W = tf.Variable(tf.float32, [voc_size, embedding_size],
                          name="embedding_w")
embedding_layer = tf.nn.embedding_lookup(embedding_W,
                                         inputs,
                                         name="embedding_layer")

#conv1
conv1_w = tf.Variable(
    tf.truncated_normal([1, embedding_size, 1, filter_nums[1]]))
conv1_b = tf.Variable(tf.constant(0.1))
conv1 = tf.relu(
    tf.nn.conv2d(embedding_layer, conv1_w, [1, 1, 1, 1],
                 padding="VALID")) + conv1_b
#conv3
conv3_1w = tf.Variable(tf.truncated_normal([1, embedding_size, 1, 2]))
conv3_1b = tf.Variable(tf.constant(0.1))
conv3_1 = tf.relu(
    tf.nn.conv2d(embedding_layer, conv3_1w, [1, 1, 1, 1],
                 padding="VALID")) + conv3_1b
conv3_3w = tf.Variable(tf.truncated_normal([3, embedding_size, 2, 4]))
conv3_3b = tf.Variable(tf.constant(0.1))
conv3 = tf.relu(
    tf.nn.conv2d(conv3_1, conv3_3w, [1, 1, 1, 1], padding="VALID") + conv3_3b)
#conv5
conv5_3w = tf.Variable(tf.truncated_normal([3, embedding_size, 2, 4]))
conv5_3b = tf.Variable(tf.constant(0.1))
conv5_3 = tf.relu(
Exemple #25
0
    def build_predict(self,
                      inputs,
                      reverse_preds=None,
                      embed_penultimate=False,
                      target_subset=None,
                      save_reprs=False):
        """Construct per-location real-valued predictions."""
        assert inputs is not None
        print('Targets pooled by %d to length %d' %
              (self.hp.target_pool, self.hp.seq_length // self.hp.target_pool))

        ###################################################
        # convolution layers
        ###################################################
        filter_weights = []
        layer_reprs = [inputs]

        seqs_repr = inputs
        for layer_index in range(self.hp.cnn_layers - 1):
            with tf.variable_scope('cnn%d' % layer_index, reuse=tf.AUTO_REUSE):
                # convolution block
                #seqs_repr = tf.Print(seqs_repr, [tf.shape(seqs_repr)], "{}".format(layer_index))
                args_for_block = self._make_conv_block_args(
                    layer_index, layer_reprs)
                seqs_repr = layers.conv_block(seqs_repr=seqs_repr,
                                              **args_for_block)

                # save representation
                layer_reprs.append(seqs_repr)
        if self.hp.multi_head_attention > 0:
            for i in range(self.hp.multi_head_attention):
                with tf.variable_scope('multi_head%d' % i,
                                       reuse=tf.AUTO_REUSE):
                    seqs_repr = layers.multi_head_attention_block(
                        seqs_repr,
                        is_training=self.is_training,
                        n_query_layers=self.hp.attention_n_query_layers,
                        num_heads=self.hp.attention_num_heads,
                        num_units=self.hp.attention_num_units,
                        decay_variable=self.hp.attention_decay_variable,
                        decay_constant=self.hp.attention_decay_constant,
                        dropout=self.hp.attention_dropout,
                        query_dropout=self.hp.attention_query_dropout,
                        l2_scale=self.hp.attention_l2_scale)

        elif self.hp.dense_attention > 0:
            seqs_repr = layers.dense_attention_block(
                seqs_repr, self.hp.dense_attention, self.is_training,
                self.hp.attention_decay_variable,
                self.hp.attention_decay_constant, self.hp.attention_dropout,
                self.hp.attention_query_dropout, self.hp.attention_l2_scale)

        elif self.hp.exp:
            if self.hp.exp_decay_variable > 0:
                seqs_repr = layers.exp_block_variable(
                    seqs_repr, self.is_training, self.hp.exp_decay_variable)
            else:
                seqs_repr = layers.exp_block(seqs_repr, self.is_training,
                                             self.hp.exp_decay_constant)
            layer_reprs.append(seqs_repr)

        # Final Conv
        with tf.variable_scope('cnn_final%d' % (self.hp.cnn_layers - 1),
                               reuse=tf.AUTO_REUSE):

            # convolution block
            #seqs_repr = tf.Print(seqs_repr, [tf.shape(seqs_repr)], "{}".format(layer_index))
            args_for_block = self._make_conv_block_args(
                self.hp.cnn_layers - 1, layer_reprs)
            seqs_repr = layers.conv_block(seqs_repr=seqs_repr,
                                          **args_for_block)

            # save representation
            layer_reprs.append(seqs_repr)

            if save_reprs:
                self.layer_reprs = layer_reprs

        # final nonlinearity
        seqs_repr = tf.nn.relu(seqs_repr)

        ###################################################
        # slice out side buffer
        ###################################################

        # update batch buffer to reflect pooling
        seq_length = seqs_repr.shape[1].value
        pool_preds = self.hp.seq_length // seq_length
        assert self.hp.batch_buffer % pool_preds == 0, (
            'batch_buffer %d not divisible'
            ' by the CNN pooling %d') % (self.hp.batch_buffer, pool_preds)
        batch_buffer_pool = self.hp.batch_buffer // pool_preds
        # slice out buffer
        seq_length = seqs_repr.shape[1]
        seqs_repr = seqs_repr[:, batch_buffer_pool:seq_length -
                              batch_buffer_pool, :]
        seq_length = seqs_repr.shape[1]

        ###################################################
        # final layer
        ###################################################
        if embed_penultimate:
            final_repr = seqs_repr
        else:
            with tf.variable_scope('final', reuse=tf.AUTO_REUSE):
                final_filters = self.hp.num_targets * self.hp.target_classes

                final_repr = tf.layers.dense(
                    inputs=seqs_repr,
                    units=final_filters,
                    activation=None,
                    kernel_initializer=tf.variance_scaling_initializer(
                        scale=2.0, mode='fan_in'),
                    kernel_regularizer=tf.contrib.layers.l1_regularizer(
                        self.hp.final_l1_scale))
                print('Convolution w/ %d %dx1 filters to final targets' %
                      (final_filters, seqs_repr.shape[2]))

                if target_subset is not None:
                    # get convolution parameters
                    filters_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/kernel')[0]
                    bias_full = tf.get_collection(
                        tf.GraphKeys.GLOBAL_VARIABLES, 'final/dense/bias')[0]

                    # subset to specific targets
                    filters_subset = tf.gather(filters_full,
                                               target_subset,
                                               axis=1)
                    bias_subset = tf.gather(bias_full, target_subset, axis=0)

                    # substitute a new limited convolution
                    final_repr = tf.tensordot(seqs_repr, filters_subset, 1)
                    final_repr = tf.nn.bias_add(final_repr, bias_subset)

                # expand length back out
                if self.hp.target_classes > 1:
                    final_repr = tf.reshape(
                        final_repr, (-1, seq_length, self.hp.num_targets,
                                     self.hp.target_classes))

        # transform for reverse complement
        if reverse_preds is not None:
            final_repr = tf.cond(reverse_preds,
                                 lambda: tf.reverse(final_repr, axis=[1]),
                                 lambda: final_repr)

        ###################################################
        # link function
        ###################################################
        if embed_penultimate:
            predictions = final_repr
        else:
            # work-around for specifying my own predictions
            # self.preds_adhoc = tf.placeholder(
            #     tf.float32, shape=final_repr.shape, name='preds-adhoc')

            # float 32 exponential clip max
            exp_max = 50

            # choose link
            if self.hp.link in ['identity', 'linear']:
                predictions = tf.identity(final_repr, name='preds')

            elif self.hp.link == 'relu':
                predictions = tf.relu(final_repr, name='preds')

            elif self.hp.link == 'exp':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max,
                                                   exp_max)
                predictions = tf.exp(final_repr_clip, name='preds')

            elif self.hp.link == 'exp_linear':
                predictions = tf.where(
                    final_repr > 0,
                    final_repr + 1,
                    tf.exp(tf.clip_by_value(final_repr, -exp_max, exp_max)),
                    name='preds')

            elif self.hp.link == 'softplus':
                final_repr_clip = tf.clip_by_value(final_repr, -exp_max, 10000)
                predictions = tf.nn.softplus(final_repr_clip, name='preds')

            else:
                print('Unknown link function %s' % self.hp.link,
                      file=sys.stderr)
                exit(1)

            # clip
            if self.hp.target_clip is not None:
                predictions = tf.clip_by_value(predictions, 0,
                                               self.hp.target_clip)

            # sqrt
            if self.hp.target_sqrt:
                predictions = tf.sqrt(predictions)

        return predictions
Exemple #26
0
import tensorflow as tf
import numpy as np

images = tf.placeholder(tf.float32, [None, 256, 256, 1])

# Conv 1
filters1_1 = tf.Variable(tf.truncated_normal([3, 3, 1, 64]))
bias1_1 = tf.Variable(tf.constant(0.1, shape=[256, 256, 64]))
conv1_1 = tf.relu(
    tf.nn.conv2d(images, filters1_1, strides=[1, 1, 1, 1], padding='SAME') +
    bias1_1)

filters1_2 = tf.Variable(tf.truncated_normal([3, 3, 1, 64]))
bias1_2 = tf.Variable(tf.constant(0.1, shape=[128, 128, 64]))
conv1_2 = tf.relu(
    tf.nn.conv2d(conv1_1, filters1_2, strides=[2, 2, 1, 1], padding='SAME') +
    bias1_2)

# Conv 2
filters2_1 = tf.Variable(tf.truncated_normal([3, 3, 1, 128]))
bias2_1 = tf.Variable(tf.constant(0.1, shape=[128, 128, 128]))
conv2_1 = tf.relu(
    tf.nn.conv2d(conv1_2, filters2_1, strides=[1, 1, 1, 1], padding='SAME') +
    bias2_1)

filters2_2 = tf.Variable(tf.truncated_normal([3, 3, 1, 128]))
bias2_2 = tf.Variable(tf.constant(0.1, shape=[64, 64, 128]))
conv2_2 = tf.relu(
    tf.nn.conv2d(conv2_1, filters2_2, strides=[2, 2, 1, 1], padding='SAME') +
    bias2_2)
Exemple #27
0
    def build_loss(self, seqs_repr, data_ops, target_subset=None):
        """Convert per-location real-valued predictions to a loss."""

        # targets
        tstart = self.batch_buffer // self.target_pool
        tend = (self.seq_length - self.batch_buffer) // self.target_pool
        self.target_length = tend - tstart

        targets = data_ops['label']
        targets = tf.identity(targets[:, tstart:tend, :], name='targets_op')

        if target_subset is not None:
            targets = tf.gather(targets, target_subset, axis=2)

        # work-around for specifying my own predictions
        self.preds_adhoc = tf.placeholder(tf.float32,
                                          shape=seqs_repr.shape,
                                          name='preds-adhoc')

        # choose link
        if self.link in ['identity', 'linear']:
            self.preds_op = tf.identity(seqs_repr, name='preds')

        elif self.link == 'relu':
            self.preds_op = tf.relu(seqs_repr, name='preds')

        elif self.link == 'exp':
            self.preds_op = tf.exp(tf.clip_by_value(seqs_repr, -50, 50),
                                   name='preds')

        elif self.link == 'exp_linear':
            self.preds_op = tf.where(seqs_repr > 0,
                                     seqs_repr + 1,
                                     tf.exp(
                                         tf.clip_by_value(seqs_repr, -50, 50)),
                                     name='preds')

        elif self.link == 'softplus':
            self.preds_op = tf.nn.softplus(tf.clip_by_value(
                seqs_repr, -50, 50),
                                           name='preds')

        elif self.link == 'softmax':
            # performed in the loss function, but saving probabilities
            self.preds_prob = tf.nn.softmax(seqs_repr, name='preds')

        else:
            print('Unknown link function %s' % self.link, file=sys.stderr)
            exit(1)

        # clip
        if self.target_clip is not None:
            self.preds_op = tf.clip_by_value(self.preds_op, 0,
                                             self.target_clip)
            targets = tf.clip_by_value(targets, 0, self.target_clip)

        # sqrt
        if self.target_sqrt:
            self.preds_op = tf.sqrt(self.preds_op)
            targets = tf.sqrt(targets)

        loss_op = None
        loss_adhoc = None
        loss_name = self.loss
        # choose loss
        if loss_name == 'gaussian':
            loss_op = tf.squared_difference(self.preds_op, targets)
            loss_adhoc = tf.squared_difference(self.preds_adhoc, targets)

        elif loss_name == 'poisson':
            loss_op = tf.nn.log_poisson_loss(targets,
                                             tf.log(self.preds_op),
                                             compute_full_loss=True)
            loss_adhoc = tf.nn.log_poisson_loss(targets,
                                                tf.log(self.preds_adhoc),
                                                compute_full_loss=True)

        elif loss_name == 'gamma':
            # jchan document
            loss_op = targets / self.preds_op + tf.log(self.preds_op)
            loss_adhoc = targets / self.preds_adhoc + tf.log(self.preds_adhoc)

        elif loss_name == 'cross_entropy':
            loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_op)
            loss_adhoc = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=(targets - 1), logits=self.preds_adhoc)

        else:
            raise ValueError('Cannot identify loss function %s' % loss_name)

        # reduce lossses by batch and position
        loss_op = tf.reduce_mean(loss_op, axis=[0, 1], name='target_loss')
        loss_op = tf.check_numerics(loss_op, 'Invalid loss', name='loss_check')

        loss_adhoc = tf.reduce_mean(loss_adhoc,
                                    axis=[0, 1],
                                    name='target_loss_adhoc')
        tf.summary.histogram('target_loss', loss_op)
        for ti in np.linspace(0, self.num_targets - 1, 10).astype('int'):
            tf.summary.scalar('loss_t%d' % ti, loss_op[ti])
        self.target_losses = loss_op
        self.target_losses_adhoc = loss_adhoc

        # fully reduce
        loss_op = tf.reduce_mean(loss_op, name='loss')
        loss_adhoc = tf.reduce_mean(loss_adhoc, name='loss_adhoc')

        # add extraneous terms
        loss_op += self.weights_regularizers
        loss_adhoc += self.weights_regularizers

        # track
        tf.summary.scalar('loss', loss_op)
        self.targets_op = targets
        return loss_op, loss_adhoc
def relu_activation(W, x):
    return tf.relu(tf.matmul(x,W) + b)