예제 #1
0
    def compute_logits(self):
        with tf.name_scope('compute_logits') as scope:
            self.all_votes_A = self.A_vote.forward(
                self.final_A_lits)  # n_A_lits x 1
            self.all_votes_L = self.L_vote.forward(
                self.final_L_lits)  # n_L_lits x 1
            self.all_votes_join_A = tf.concat([
                self.all_votes_A[0:self.n_A_vars],
                self.all_votes_A[self.n_A_vars:self.n_A_lits]
            ],
                                              axis=1)
            self.all_votes_join_L = tf.concat([
                self.all_votes_L[0:self.n_L_vars],
                self.all_votes_L[self.n_L_vars:self.n_L_lits]
            ],
                                              axis=1)
            self.all_votes_batched_A = tf.reshape(
                self.all_votes_join_A,
                [self.n_batches, self.n_A_vars_per_batch, 2])
            self.all_votes_batched_L = tf.reshape(
                self.all_votes_join_L,
                [self.n_batches, self.n_L_vars_per_batch, 2])

            # try to use only A_votes for logits?
            self.A_logits = self.all_votes_batched_A
            self.L_logits = self.all_votes_batched_L
            self.A_policy = tf.softmax(self.A_logits)
            self.L_policy = tf.softmax(self.L_logits)
예제 #2
0
  def output_layer(self, seq_1, seq_2, scope = 'PointerNetwork'):
    h_P = seq_1
    u_Q = seq_2
    cell = self.GRUCellGPU(num_units)
    with tf.variable_scope(scope):
      # initialize hidden state of answer
      attn_v_a = tf.get_varialbe('attn_v')
      w_u_Q = tf.get_varialbe('weight_passage')
      w_v_Q = tf.get_varialbe('weight_answer')
      V_Q = tf.get_varialbe('weight_answer')
      score_a = tf.reduce_sum(attn_v_a * tf.tanh(w_u_Q * u_Q + w_a * h_a), [2]) # scores for all tokens
      alignments_a = tf.softmax(score_a)
      r_Q = tf.reduce_sum(tf.matmul(alignments_a, u_Q, transpose_b=True), [2])

      attn_v = tf.get_varialbe('attn_v')
      w_P = tf.get_varialbe('weight_passage')
      w_a = tf.get_varialbe('weight_answer')
      h_a = tf.get_varialbe('embedding_answer')
      score = tf.reduce_sum(attn_v * tf.tanh(w_P * h_P + w_a * h_a), [2]) # scores for all tokens
      alignments = tf.softmax(score)
      alignments = tf.expand_dims(alignments, 1)
      v_P = tf.expand_dims(u_Q, 1)
      context = tf.reduce_sum(tf.matmul(alignments, v_P, transpose_b=True), [2])
      outputs, h_a = cell(h_a, context)
      self.answer = outputs
    return self.answer
예제 #3
0
 def _process_3d_logits_train(logits, routing_weights, labels):
     processing_results = _process_3d_logits(logits, routing_weights,
                                             labels)
     if FLAGS.loss == 'gibbs_ce':
         probs = processing_results['weighted_probs']
         negative_log_likelihood = processing_results['weighted_gibbs_ce']
     elif FLAGS.loss == 'unweighted_gibbs_ce':
         probs = processing_results['unweighted_probs']
         negative_log_likelihood = processing_results['unweighted_gibbs_ce']
     elif FLAGS.loss == 'moe':
         probs = processing_results['weighted_probs']
         negative_log_likelihood = tf.math.reduce_mean(
             tf.keras.losses.sparse_categorical_crossentropy(
                 labels, probs, from_logits=False))
     elif FLAGS.loss == 'unweighted_moe':
         probs = processing_results['unweighted_probs']
         negative_log_likelihood = tf.math.reduce_mean(
             tf.keras.losses.sparse_categorical_crossentropy(
                 labels, probs, from_logits=False))
     elif FLAGS.loss == 'poe':
         probs = tf.softmax(processing_results['weighted_logits'])
         negative_log_likelihood = tf.math.reduce_mean(
             tf.keras.losses.sparse_categorical_crossentropy(
                 labels,
                 processing_results['weighted_logits'],
                 from_logits=True))
     elif FLAGS.loss == 'unweighted_poe':
         probs = tf.softmax(processing_results['unweighted_logits'])
         negative_log_likelihood = tf.math.reduce_mean(
             tf.keras.losses.sparse_categorical_crossentropy(
                 labels,
                 processing_results['unweighted_logits'],
                 from_logits=True))
     return probs, negative_log_likelihood
예제 #4
0
파일: pixnet.py 프로젝트: gmaher/glimpse
def lstm_block(x,
               v,
               lstm_size=512,
               vocab_size=52,
               num_words=30,
               feed_previous=False,
               scope='lstm_block',
               reuse=False,
               batch_size=4):

    with tf.variable_scope(scope, reuse=reuse):
        with tf.variable_scope('lstm_1', reuse=reuse):
            lstm_first = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse)
            state_first = lstm_first.zero_state(batch_size, tf.float32)

            o_1, state_first = lstm_first(x[:, 0, :], state_first)

            r = tf.concat([o_1, v], axis=1)
        with tf.variable_scope('lstm_2', reuse=reuse):
            lstm_second = tf.contrib.rnn.BasicLSTMCell(lstm_size, reuse=reuse)
            state_second = lstm_second.zero_state(batch_size, tf.float32)

            o_2, state_second = lstm_second(r, state_second)

        o = fullyConnected(o_2,
                           output_units=vocab_size,
                           std='xavier',
                           activation=tf.identity,
                           reuse=False,
                           scope='lstm_fc')

    with tf.variable_scope(scope, reuse=True):
        #Teacher training, we feed in a list of words so dont need to feed back in
        #the output of the lstm
        outputs = []
        outputs.append(o)
        for i in range(num_words - 1):
            if not feed_previous:
                word = x[:, i + 1, :]
            else:
                word = tf.softmax(o)

            with tf.variable_scope('lstm_1', reuse=True):
                o, state_first = lstm_first(word, state_first)

            o = tf.concat([o, v], axis=1)

            with tf.variable_scope('lstm_2', reuse=True):
                o, state_second = lstm_second(o, state_second)

            o = fullyConnected(o,
                               output_units=vocab_size,
                               std='xavier',
                               activation=tf.identity,
                               reuse=True,
                               scope='lstm_fc')

            outputs.append(o)

    return outputs
예제 #5
0
파일: postprocess.py 프로젝트: qing0991/tpu
def _apply_score_activation(logits, num_classes, activation):
    """Applies activation to logits and removes the background class.

  Note that it is assumed that the background class has index 0, which is
  sliced away after the score transformation.

  Args:
    logits: the raw logit tensor.
    num_classes: the total number of classes including one background class.
    activation: the score activation type, one of 'SIGMOID', 'SOFTMAX' and
      'IDENTITY'.

  Returns:
    scores: the tensor after applying score transformation and background
      class removal.
  """
    batch_size = tf.shape(logits)[0]
    logits = tf.reshape(logits, [batch_size, -1, num_classes])
    if activation == 'SIGMOID':
        scores = tf.sigmoid(logits)
    elif activation == 'SOFTMAX':
        scores = tf.softmax(logits)
    elif activation == 'IDENTITY':
        pass
    else:
        raise ValueError(
            'The score activation should be SIGMOID, SOFTMAX or IDENTITY')
    scores = scores[..., 1:]
    return scores
    def add_prob_logits_samples(self):
        outputs = tf.unstack(self.returns['output'])
        batch_nums = tf.range(0, limit=self.hpm['batch_size'], dtype=tf.int64)
        argmax_seqs = []
        argmax_seqs_log_probs = []
        for i, x in enumerate(outputs):
            max_ids = tf.argmax(x, axis=-1)
            indices = tf.stack((batch_nums, max_ids), axis=-1)
            log_probs = tf.gather_nd(x, indices)
            argmax_seqs.append(max_ids)
            argmax_seqs_log_probs.append(log_probs)

        self.outputs = self.returns['output']
        if not self.hpm['pointer_gen']:
            self.outputs = tf.softmax(self.outputs)

        self.argmax_seqs = tf.stack(argmax_seqs, name='argmax_seqs')
        self.argmax_seqs_log_probs = tf.stack(argmax_seqs_log_probs,
                                              name='argmax_seqs_log_probs')

        sampler = tf.distributions.Categorical(logits=outputs)
        self.samples = sampler.sample(name='samples')
        self.samples = tf.identity(self.samples, name='samples')
        self.samples_log_probs = sampler.log_prob(self.samples,
                                                  name="samples_log_probs")
        self.samples_log_probs = tf.identity(self.samples_log_probs,
                                             name="samples_log_probs")
예제 #7
0
def make_attention(s, e, name):
    """
  s: sentence: slen x ssz
  e: ssz x embedsz
  a: attention coefficients. Here is a design choice.
     either I have:
     - out[w][k] = Σw' s[w][i] a[i][j][k] s[w'][k] | a: embedsz^3
     - out[w][k] = Σw' a[i][k] (s[w];s[w'])[i] | a: 2xembedsz^2
     - out[w][k] = a[0][i][k]s[w][i] +  Σw' a[1][i][k]s[w'][i] | a: 2xembedsz^2
  """

    embedsz = e.shape[-1]

    a = tf.Variable(tf.random_normal([2 * embedsz, embedsz]), name=name)

    # s: slen x ssz @ ssz x embedsz: slen x embedsz
    s = tf.matmul(s, e)

    # ss: slen x (embedz + embedz)
    ss = tf.concat([s, s], axis=1)

    sattn = tf.matmul(ss, a)

    # softmax so we have keys into the next embedding layer
    # make sure that we know what values to use
    sattn = tf.softmax(sattn / tf.sqrt(embedsz))
    return a, sattn
예제 #8
0
파일: TFUtils.py 프로젝트: MNiny/ISLES-Test
def soft_dice_loss(logits, ground_truth):
    #probabilities = tf.sigmoid(logits)
    probabilities = tf.softmax(logits)
    interception_volume = tf.reduce_sum(probabilities * ground_truth)
    return -2 * interception_volume + tf.constant(smooth) / (
        tf.norm(ground_truth, ord=1) + tf.norm(probabilities, ord=1) +
        tf.constant(smooth))
예제 #9
0
 def gated_attn(self, seq_1, seq_2, num_units = self.len_gs, scope = 'GatedAttn'):
   ''' gated attention for seq_2 w.r.t seq_1
     input:
       seq_1: query sequence in attention mechanism
       seq_2: encoder sequence in attention mechanism
     output:
       outputs:
       v_P:
       context:
   '''
   u_Q = seq_1
   u_P = seq_2
   cell = self.GRUCellGPU(num_units)
   with tf.variable_scope(scope):
     attn_v = tf.get_variable('attn_v', [num_units])
     w_u_Q = tf.get_variable('weight_ques')
     w_u_P = tf.get_variable('weight_pass_orig')
     w_v_P = tf.get_variable('weight_pass_ques')
     v_P = tf.get_variable('embedding_pass_ques')
     score = tf.reduce_sum(attn_v * tf.tanh(w_u_Q * u_Q + w_u_P * u_P + w_v_P * v_P), [2]) # scores of all tokens
     alignments = tf.softmax(score)
     alignments = tf.expand_dims(alignments, 1)
     u_Q = tf.expand_dims(u_Q, 1)
     context = tf.reduce_sum(tf.matmul(alignments, u_Q, transpose_b=True), [2])
     # gate
     inputs = tf.concat([u_Q, context], 1)
     w_g = tf.get_variable('weight_gate')
     g = tf.sigmoid(tf.reduce_sum(w_g * inputs))
     gated_inputs = g * inputs
     outputs, v_P = cell(gated_inputs, v_P)
   return outputs, v_P
예제 #10
0
 def write(self, Args):
     row = tf.transpose(tf.softmax(Args[0][:, 0:2]))  # 2 * 1
     value = tf.tile(Args[1], [2, 1])
     a_w = D(tf.tile(self.ADDR, [1, 2]),
             self.ptrs[:, -2:]) * row  # 2 * max_seq_length
     out = self.envs[-2:, :, :] * tf.expand_dims(
         1.0 - a_w, 2) + tf.expand_dims(value, 1) * tf.expand_dims(a_w, 2)
     self.envs = tf.concat(0,
                           [self.envs[0:self.max_env_size - 2, :, :], out])
예제 #11
0
def capsnet(inputs):
    '''
    Construct a 3-layer capsule net with 28x28 inputs. 
    '''

    ## Layer 1 is a regular convulution. We blow 1 channel up into 256 channels.
    with tf.variable_scope('conv1') as scope:
        kernel = _get_kernel('weights', [9, 9, 1, 256], stddev=5e-2, reg=0.0)
        conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='VALID')
        biases = tf.get_variable('biases', [256],
                                 initializer=tf.constant_initializer(0.0))
        pre_act = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(pre_act, name=scope.name)

    ## Layer 2 is the first capsule layer. It amounts to 32 parallel convolutions from 256 channels
    ## down to 8 channels. Each of these 32 conv layers contains (width) * (height) capsules of length 8.
    ## The output of the layer is a [width * height * 32] * 8 matrix. Each of the [width * height * 32] rows
    ## represents a capsule.
    capsules1 = tf.zeros((0, 8))

    with tf.variable_scope('primary_caps' + str(i)) as scope:
        for i in range(0, 32):
            kernel = _get_kernel('weights' + str(i), [9, 9, 256, 8],
                                 stddev=5e-2,
                                 reg=0.0)
            conv = tf.nn.conv2d(conv1, kernel, [1, 2, 2, 1], padding='VALID')
            biases = tf.get_variable('biases' + str(i), [8],
                                     initializer=tf.constant_initializer(0.0))
            pre_act = tf.nn.bias_add(conv, biases)
            conv2 = tf.nn.relu(pre_act, name=scope.name)
            shaped = tf.reshape(conv2, [36, 8])
            capsules1 = tf.concat([capsules1, shaped], 0)

    with tf.variable_scope('coupling') as scope:
        priors = tf.get_variable('priors',
                                 shape=[capsules1.shape[0], 10],
                                 initializer=tf.constant_initializer(0.0))
        coupling_coeffs = tf.softmax(priors)

    with tf.variable_scope('secondary_caps'):
        for j in range(0, NUM_CLASSES):
            routes_into_j = []
            for i in range(0, capsules1.shape[0]):
                W_ij = _get_tn_var('weights_' + str(i) + str(j),
                                   shape=[16, 8],
                                   stddev=0.04,
                                   reg=0.004)
                b_ij = tf.get_variable('biases_' + str(i) + str(j), [16],
                                       initializer=tf.constant_intializer(0.0))
                uhat = tf.add(tf.matmul(W_ij, capsules1[i]),
                              b_ij)  # \times c_i
                routes_into_j.append(tf.scalar_mul(coupling_coeffs[i, j],
                                                   uhat))
            s_j = tf.reduce_sum(routes_into_j)
예제 #12
0
 def get_h_tile(cls, s, s1):
     """
     attended vectors of s1
     which words in s1 is most similar to each words in s2
     """
     t1 = s1.shape[1]
     b_weight = tf.reshape(tf.softmax(tf.max(s, 2)[0], -1), [-1, 1])
     h_tile = tf.tile(tf.matmul(b_weight, s1), [1, t1, 1])
     #         b_weight = F.softmax(torch.max(s, dim=2)[0], dim=-1).unsqueeze(1)  # [b, t2]
     #         h_tile = torch.bmm(b_weight, s1).repeat(1, t1, 1)  # repeat to match s1 # [B, t1, D]
     return h_tile
예제 #13
0
def run_example():

    x = tf.constant([1, 2, 3, 4, 5, 6], dtype=tf.float32)

    reshape_op = tf.reshape(x, [3, 2])
    softmax_op = tf.softmax(reshape_op)

    with tf.Session() as sess:

        res = sess.run(softmax_op)
        print("Result 1 = \n{}\n".format(res))
예제 #14
0
 def next_inputs_fn(time, outputs, state, sample_id):
     output_prob = tf.softmax(outputs, axis=-1)
     next_embedding = output_prob.dot(embedding)
     # 				elements_finished = (time >= sentence_lengths)
     elements_finished = tf.tile(tf.constant([0]),
                                 [self.batch_size])
     # 				all_finished = tf.reduce_all(elements_finished) # making length different
     # 				all_finished = False
     next_inputs = next_embedding
     next_state = state
     return elements_finished, next_inputs, next_state
예제 #15
0
def attention_layer(input_tensor,
                    num_attention_head,
                    attn_head_size):
    """
    input_tensor [B,N,S] 

    num_attention_head : K
    attn_head_size : H
    """

    def transpose_for_score(input_tensor,batch_size,seq_length,num_attn_head,attn_head_size):
        input_tensor = tf.reshape(input_tensor,[batch_size,seq_length,
                                                num_attn_head,attn_head_size])
        output_tensor = tf.transpose(input_tensor,[0,2,1,3])
        return output_tensor


    input_shape = input_tensor.shape.as_list()
    batch_size = input_shape[0]
    seq_length = input_shape[1]
    hidden_size = input_shape[2]
    #[B*N,S]
    input_tensor = tf.reshape(input_tensor,[-1,input_shape[-1]])

    #[B*N,K*H]
    query = tf.layers.dense(
        input_tensor,
        num_attention_head * attn_head_size,
        name = 'query'
        )
    #[B*N,K*H]
    key = tf.layers.dense(
        input_tensor,
        num_attenion_head*attn_head_size,
        name = 'key'
        )
    #[B*N,K*H]
    value =tf.layers.dense(
        input_tensor,
        num_attenion_head*attn_head_size,
        name = 'value'
        ) 
    
    #[B,K,N,H]
    query = transpose_for_score(query,batch_size,seq_length,
                                num_attn_head,attn_head_size)
    key = transpose_for_score(key,batch_size,seq_length,
                                num_attn_head,attn_head_size)
    attention_score = tf.matmul(query,key,transpose_b = True)
    attention_score = tf.multiply(attention_score,1.0/math.sqrt(attn_head_size))
    attention_score = tf.softmax(attention_score,axis = -1)
    
예제 #16
0
def _apply_score_activation(logits, num_classes, activation):
    """Applies activation to logits and removes the background class."""
    batch_size = tf.shape(logits)[0]
    if activation == 'SIGMOID':
        logits = tf.reshape(logits, [batch_size, -1, num_classes])
        scores = tf.sigmoid(logits)
    elif activation == 'SOFTMAX':
        logits = tf.reshape(logits, [batch_size, -1, num_classes + 1])
        scores = tf.softmax(logits)
        scores = scores[:, 1::]
    else:
        raise ValueError(
            'The score activation should be either SIGMOID or SOFTMAX.')
    return scores
예제 #17
0
    def build(self):
        # build input place holder
        self._build_input_pl()

        # build feature extractor
        rpn_feature_maps, mrcnn_feature_maps = self._build_feature_map()

        # get anchors
        featmap_shape_list = self._compute_featmap_shape_list(rpn_feature_maps)
        anchors = self._generate_anchors(featmap_shape_list)

        # rpn_logits is used to get loss of it
        # rpn_cls_scores (N,all_num_anchors,2)
        rpn_bboxes_delta, rpn_cls_scores = self._build_rpn(
            rpn_feature_maps, weight_decay=self._weight_decay)

        rpn_cls_scores_reshape = tf.reshape(
            rpn_cls_scores, (tf.shape(rpn_cls_scores)[0], -1, 2))
        rpn_cls_prob = tf.softmax(rpn_cls_scores_reshape, axis=1)

        # shape(2*num_anchors_per_location,all_num_anchors)
        rpn_cls_prob_reshape = tf.reshape(rpn_cls_prob,
                                          (tf.shape(rpn_cls_prob)[0], -1, 2 *
                                           self._rpn_num_anchors_per_location))

        #################################
        # AnchorTarget
        # generate labels for anchors
        #################################
        if self._training:
            rpn_bbox_targets, rpn_bbox_labels, rpn_bbox_inside_weights, rpn_bbox_outside_weights = self._generate_anchor_target(
                anchors, self._gt_boxes, self._img_shape)

        #################################
        # Proposal
        #################################
        rois = self._generate_proposal(rpn_cls_prob_reshape, rpn_bboxes_delta,
                                       anchors)
        # # decode bbox by applying deltas to anchors
        # self._bbox_decoder.decode(anchors, rpn_bboxes_delta)

        #################################
        # ProposalTarget
        #################################
        if self._training:
            self._generate_proposal_target(
                self._gt_boxes,
                self._gt_labels,
                rois,
            )
예제 #18
0
def get_logits(x):
    #cnn_input = tf.reshape(x,[deep_AS_config.batch_size, deep_AS_config.num_units, deep_AS_config.vocab_size,1])

    softmax_weight1 = tf.get_variable(
        name="dense8_weights",
        shape=[
            deep_AS_config.FLAGS.num_units, deep_AS_config.FLAGS.label_class
        ],
        initializer=tf.uniform_unit_scaling_initializer(1.43))
    softmax_bias1 = tf.get_variable(name="dense8_biases",
                                    shape=[deep_AS_config.FLAGS.label_class],
                                    initializer=tf.constant_initializer(0.1))
    #logit = tf.nn.relu(tf.matmul(out, dense8_weight) + dense8_bias)
    logit = tf.softmax(tf.matmul(x, softmax_weight1) + softmax_bias1)
    return logit
예제 #19
0
 def get_u_tile(cls, s, s2):
     """
     attended vectors of s2 for each word in s1,
     signify which words in s2 are most relevant to words in s1
     """
     a_weight = tf.softmax(s, 2)  # [B, t1, t2]
     tf.assign(a_weight[tf.where(tf.is_nan(a_weight))], 0)
     #         a_weight.data.masked_fill_(a_weight.data != a_weight.data, 0)  # remove nan from softmax on -inf
     #         u_tile = torch.bmm(a_weight, s2)  # [B, t1, t2] * [B, t2, D] -> [B, t1, D]
     u_til = tf.matmul(a_weight, s2)
     a_weight = F.softmax(s, dim=2)  # [B, t1, t2]
     tf.assign(a_weight[tf.where(tf.is_nan(a_weight))], 0)
     #         a_weight.data.masked_fill_(a_weight.data != a_weight.data, 0)  # remove nan from softmax on -inf
     #         u_tile = torch.bmm(a_weight, s2)  # [B, t1, t2] * [B, t2, D] -> [B, t1, D]
     u_til = tf.matmul(a_weight, s2)
     return u_tile
예제 #20
0
파일: train.py 프로젝트: rsepassi/tf-play
    def __init__(self, batcher, in_node, out_node, cost, train_step, classifier=False):
        self.batcher = batcher
        self.in_node = in_node
        self.out_node = out_node
        self.cost = cost
        self.train_step = train_step
        self.global_step = tfutils.opt.global_step()
        self.eval_cost = tf.Variable(0.0, name='eval_cost', trainable=False)

        if classifier:
            self.eval_accuracy = tf.Variable(0.0, name='eval_accuracy',
                                             trainable=False)
            predictions = tf.argmax(tf.softmax(y_hat), 1)
            true = tf.argmax(y, 1)

        self.step = 1
예제 #21
0
def inference(image):
    FCN8s.logger.info("inference")

    # FCN
    net = FCN8s()
    with tf.name_scope("FCN8s"):
        net.build(image, debug=True)
    fub = tf.softmax(net.upscore32, axis=3)

    # Matting
    matting_module = tf.load_op_library("matting.so")
    with tf.name_scope("matting"):
        init = tf.constant_initializer(value=100.0, dtype=tf.double)
        lamb = tf.get_variable(name="lambda", initializer=init, shape=[1])
        pred_annotation = matting_module.matting(image, fub, lamb)

    return pred_annotation
예제 #22
0
파일: cart.py 프로젝트: n0mad/rl
def do_episode(W):
    rewards = []
    observations = []
    observation = ENV.reset()
    for i in xrange(250):
        p_0 = tf.softmax(np.dot(W, observation))
        if npr.uniform() < p_0:
            action = 0
            pi.append(p_0)
        else:
            action = 1
            pi.append(1 - p_0)
        observation, reward, done, info = ENV.step(action)
        rewards.append(reward)
        if done:
            break
    rewards = np.array(rewards)
    pi = np.array(pi)
    return np.dot(rewards, np.log(pi))
예제 #23
0
 def self_matching_attn(self, seq, scope = 'SelfMatchAttn'):
   ''' self-matching attention of seq
     input:
     output:
   '''
   v_P = seq
   with tf.variable_scope(scope):
     attn_v = tf.get_variable('attn_v')
     w_v_P = tf.get_variable('weight_passage')
     w_v_P_w = tf.get_variable('weight_passage_wave')
     score = tf.reduce_sum(attn_v * tf.tanh(w_v_P * v_P + w_v_P_w * v_P), [2]) # scores for all tokens
     alignments = tf.softmax(score)
     alignments = tf.expand_dims(alignments, 1)
     v_P = tf.expand_dims(u_Q, 1)
     context = tf.reduce_sum(tf.matmul(alignments, v_P, transpose_b=True), [2])
     inputs = tf.concat([seq, context], 1)
     outputs = self.bidirectionalGRU(inputs, 1)
     h_P = outputs
   return h_P
예제 #24
0
 def choose_action(self, observation, cur_time):
     observation = tf.stop_gradient(observation)
     layer = tf.layers.dense(
         inputs=observation,
         units=self.n_features,
         activation=tf.nn.tanh,
         kernel_initializer=tf.random_normal_initializer(mean=0,
                                                         stddev=0.3),
         bias_initializer=tf.constant_initializer(0.1),
         name='fc1')
     all_act = tf.layers.dense(
         inputs=layer,
         units=self.n_actions,
         activation=tf.nn.tanh,
         kernel_initializer=tf.random_normal_initializer(mean=0,
                                                         stddev=0.3),
         bias_initializer=tf.constant_initializer(0.1),
         name='fc2')
     act_prob = tf.softmax(all_act, name='act_prob')
     action = tf.multinomial(tf.log(act_prob), 1)
     self.actions.append(actions)
     self.all_act_prob.append(act_prob)
     return actions
예제 #25
0
    def __call__(self, inputs):

        # transpose to 'channel first'
        x = Lambda(lambda x: tf.transpose(x, [0, 3, 1, 2]))(inputs)

        # key, value and query
        u = Conv2D(filters=self.groups,
                   kernel_size=1,
                   padding='same',
                   data_format='channels_first',
                   kernel_initializer='glorot_uniform')(x)
        v = Conv2D(filters=self.groups,
                   kernel_size=1,
                   padding='same',
                   data_format='channels_first',
                   kernel_initializer='glorot_uniform')(x)
        z = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_a=True))([u, v])
        # attention
        z = Lambda(lambda x: tf.softmax(x, axis=2))(z)

        w = Conv2D(filters=self.groups,
                   kernel_size=1,
                   padding='same',
                   data_format='channels_first',
                   kernel_initializer='glorot_uniform')(x)

        w = Lambda(lambda x: tf.matmul(x[0], x[1], transpose_b=True))([z, w])

        w = Conv2D(filters=self.groups,
                   kernel_size=1,
                   padding='same',
                   data_format='channels_first',
                   kernel_initializer='glorot_uniform')(w)

        outputs = Lambda(lambda x: tf.transpose(x, [0, 3, 2, 1]))

        return outputs
        def loop_fn(time, cell_output, cell_state, loop_state):
            emit_output = cell_output  # == None for time == 0
            if cell_output is None:  # time == 0
                next_cell_state = cell.zero_state(batch_size, tf.float32)
            else:
                next_cell_state = cell_state

            prev_h = next_cell_state.h
            prev_c = next_cell_state.c
            tgt_t = tgt_ta.read(time)

            # projection of previous hidden state onto source word space
            tgt_hid_proj = slim.fully_connected(prev_h, hidden_size,
                                                'tgt_hid_proj')
            tgt_cel_proj = slim.fully_connected(prev_c, hidden_size,
                                                'tgt_cel_proj')
            tgt_emb_t = tf.nn.embedding_lookup(tgt_embeddings, tgt_t)

            # tgt_rep of shape [batch_size, hidden_size].
            tgt_rep = tgt_hid_proj + tgt_cel_proj + tgt_embs
            tgt_rep = tf.expand_dims(tgt_rep, 2)

            attn_scores = tf.squeeze(tf.matmul(windowed_seqpos_embs, tgt_rep),
                                     2)
            # attn of shape [batch_size, max_time].
            conv_attn_aux = seqpos_embs * tf.softmax(attn_scores)

            elements_finished = (time >= tgt_seqlen)
            finished = tf.reduce_all(elements_finished)
            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, hidden_size], dtype=tf.float32),
                lambda: conv_attn_aux + tf.nn.embedding_lookup(
                    inp_embeddings, tgt_t))
            next_loop_state = None
            return (elements_finished, next_input, next_cell_state,
                    emit_output, next_loop_state)
예제 #27
0
    def construct_network(self):
    
    
        tf.reset_default_graph()
        self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids")
        self.sentence_lengths = tf.placeholder(tf.int32, [None], name="sentence_lengths")
        self.word_ids_knowledge = tf.placeholder(tf.int32, [None, None, None], name="word_ids_know")
        self.sentence_tokens = tf.placeholder(tf.string, [None, None], name="word_list_sentence")
        self.knowledge_lengths = tf.placeholder(tf.int32, [None, None], name="sentence_lengths_know")
        self.knowledge_tokens = tf.placeholder(tf.string, [None, None, None], name="word_list_knowledge")
        self.knowledge_max_lengths = tf.placeholder(tf.int32, [None, None], name="sentence_lengths_max_know")
        self.word_ids_context = tf.placeholder(tf.int32, [None, None], name="word_ids_context")
        self.context_tokens = tf.placeholder(tf.string, [None, None], name="words_list_context")
        self.context_lengths = tf.placeholder(tf.int32, [None], name="sentence_lengths_context")
        self.sentence_labels = tf.placeholder(tf.float32, [None, None], name="sentence_labels")
        self.batch_size = tf.Variable(0)
        self.max_lengths = tf.placeholder(tf.int32, [None], name="max_lengths_padding")
        self.weights_path = tf.placeholder(tf.float32, [None, None], name="weights_path")
        self.learningrate = tf.placeholder(tf.float32, name="learningrate")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        self.loss = 0.0
        input_tensor = None
        input_vector_size = 0 
        #reiss= ['physiological', 'love', 'spiritual growth', 'esteem', 'stability']
        if self.config["human_needs"] == "maslow":
            reiss=['physiological', 'love', 'spiritual growth', 'esteem', 'stability']
            #human_needs =['physiological', 'love', 'spiritual growth', 'esteem', 'stability']
        elif self.config["human_needs"] == "reiss":
            #reiss = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'belonging', 'contact', 'savings', 'idealism', 'rest']
            reiss = ['status', 'approval', 'tranquility', 'competition', 'health', 'family', 'romance', 'food', 'indep', 'power', 'order', 'curiosity', 'serenity', 'honor', 'contact', 'savings', 'idealism', 'rest']
                
        self.initializer = None
        if self.config["initializer"] == "normal":
            self.initializer = tf.random_normal_initializer(mean=0.0, stddev=0.1)
        elif self.config["initializer"] == "glorot":
            self.initializer = tf.glorot_uniform_initializer()
        elif self.config["initializer"] == "xavier":
            self.initializer = tf.glorot_normal_initializer()
            
            
            
            
###############################################################################    BILSTM   #############################################################################################
        if self.config["neural_network"]=="BILSTM":
###############################################################################   SENTENCE BI-LSTM  #############################################################################################
         zeros_initializer = tf.zeros_initializer()
         input_tensor = None
         with tf.variable_scope("sentence"):
          word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], 
            use_peepholes=self.config["lstm_use_peepholes"], 
            state_is_tuple=True, 
            initializer=self.initializer,
            reuse=False)
            
          word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], 
            use_peepholes=self.config["lstm_use_peepholes"], 
            state_is_tuple=True, 
            initializer=self.initializer,
            reuse=False)
            
          self.word_embeddings = tf.get_variable("word_embeddings", 
                                 shape=[len(self.term2index), self.config["word_embedding_size"]], 
                                 initializer=(zeros_initializer if self.config["emb_initial_zero"] == True else self.initializer), 
                                 trainable=(True if self.config["train_embeddings"] == True else False))
          use_elmo = True
          if use_elmo:
          	elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)   
          	input_tensor = elmo(inputs={"tokens": self.sentence_tokens,"sequence_len": self.sentence_lengths},signature="tokens",as_dict=True)["elmo"]
          else:
          	input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids)          
          
          input_vector_size = self.config["word_embedding_size"]
          self.word_representations = input_tensor
          dropout_input = self.config["dropout_input"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
          input_tensor =  tf.nn.dropout(input_tensor, dropout_input, name="dropout_word")
                
          (lstm_outputs_fw, lstm_outputs_bw), ((_, lstm_output_fw), (_, lstm_output_bw)) = tf.nn.bidirectional_dynamic_rnn(word_lstm_cell_fw, word_lstm_cell_bw, input_tensor, sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False)
          
          dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
          lstm_outputs_fw =  tf.nn.dropout(lstm_outputs_fw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([tf.shape(self.word_ids)[0],1,self.config["word_recurrent_size"]], dtype=tf.int32))
          lstm_outputs_bw =  tf.nn.dropout(lstm_outputs_bw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([tf.shape(self.word_ids)[0],1,self.config["word_recurrent_size"]], dtype=tf.int32))
          lstm_outputs = tf.concat([lstm_outputs_fw, lstm_outputs_bw], -1)
          self.lstm_outputs = lstm_outputs
          
          if self.config["sentence_composition"] == "last":
                processed_tensor = lstm_outputs
                self.attention_weights_unnormalised = tf.zeros_like(self.word_ids, dtype=tf.float32)
          elif self.config["sentence_composition"] == "attention":
                attention_evidence = tf.layers.dense(lstm_outputs, self.config["attention_evidence_size"], activation=tf.sigmoid, kernel_initializer=self.initializer)
                attention_weights = tf.layers.dense(attention_evidence, 1, activation=None, kernel_initializer=self.initializer)
                attention_weights = tf.reshape(attention_weights, shape=tf.shape(self.word_ids))
                if self.config["attention_activation"] == "sharp":
                    attention_weights = tf.exp(attention_weights)
                elif self.config["attention_activation"] == "soft":
                    attention_weights = tf.sigmoid(attention_weights)
                elif self.config["attention_activation"] == "linear":
                    pass
                else:
                    raise ValueError("Unknown activation for attention: " + str(self.config["attention_activation"]))

                self.attention_weights_unnormalised = attention_weights
                attention_weights = tf.where(tf.sequence_mask(self.sentence_lengths), attention_weights, tf.zeros_like(attention_weights))
                attention_weights = attention_weights / tf.reduce_sum(attention_weights, 1, keep_dims=True)
                processed_tensor_1 = tf.reduce_sum(lstm_outputs * attention_weights[:,:,numpy.newaxis], 1)

          
          self.token_scores = [tf.where(tf.sequence_mask(self.sentence_lengths), self.attention_weights_unnormalised, tf.zeros_like(self.attention_weights_unnormalised) - 1e6)]
          
          if self.config["hidden_layer_size"] > 0:
             if self.config["sentence_composition"] == "attention":
                #processed_tensor_sentence = tf.reduce_max(lstm_outputs,1) 
                processed_tensor_sentence = tf.layers.dense(processed_tensor_1, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)
             elif self.config["sentence_composition"] == "last": 
               processed_tensor_sentence = tf.layers.dense(processed_tensor, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)
          
          
#####################################################################  CONTEXT BI-LSTM ##################################################   
          
         with tf.variable_scope("context"):      
          
          context_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], 
            use_peepholes=self.config["lstm_use_peepholes"], 
            state_is_tuple=True, 
            initializer=self.initializer,
            reuse=False)
            
          context_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], 
            use_peepholes=self.config["lstm_use_peepholes"], 
            state_is_tuple=True, 
            initializer=self.initializer,
            reuse=False)    
          input_vector_size = self.config["word_embedding_size"]
          self.word_representations = input_tensor
          
          use_elmo = True
          if use_elmo:
                elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)     
                input_tensor= elmo(inputs={"tokens": self.context_tokens,"sequence_len": self.context_lengths},signature="tokens",as_dict=True)["elmo"]
          else:
          	input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids_context)  
          dropout_input = self.config["dropout_input"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
          input_tensor =  tf.nn.dropout(input_tensor, dropout_input, name="dropout_word")	    
          (lstm_outputs_fw, lstm_outputs_bw), ((_, lstm_output_fw), (_, lstm_output_bw)) = tf.nn.bidirectional_dynamic_rnn(context_lstm_cell_fw, context_lstm_cell_bw, input_tensor, sequence_length=self.context_lengths, dtype=tf.float32, time_major=False)
          
          dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
          lstm_outputs_fw =  tf.nn.dropout(lstm_outputs_fw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([tf.shape(self.word_ids_context)[0],1,self.config["word_recurrent_size"]], dtype=tf.int32))
          lstm_outputs_bw =  tf.nn.dropout(lstm_outputs_bw, dropout_word_lstm, noise_shape=tf.convert_to_tensor([tf.shape(self.word_ids_context)[0],1,self.config["word_recurrent_size"]], dtype=tf.int32))
          lstm_outputs = tf.concat([lstm_outputs_fw, lstm_outputs_bw], -1)
          #if self.config["hidden_layer_size"] > 0:
          #      lstm_outputs = tf.layers.dense(lstm_outputs, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)
          self.lstm_outputs = lstm_outputs

          if self.config["sentence_composition"] == "last":
                processed_tensor_context = lstm_outputs
                self.attention_weights_unnormalised = tf.zeros_like(self.word_ids_context, dtype=tf.float32)
          elif self.config["sentence_composition"] == "attention":      
                attention_evidence = tf.layers.dense(lstm_outputs, self.config["attention_evidence_size"], activation=tf.sigmoid, kernel_initializer=self.initializer)

                attention_weights = tf.layers.dense(attention_evidence, 1, activation=None, kernel_initializer=self.initializer)
                attention_weights = tf.reshape(attention_weights, shape=tf.shape(self.word_ids_context))

                if self.config["attention_activation"] == "sharp":
                    attention_weights = tf.softmax(attention_weights)
                elif self.config["attention_activation"] == "soft":
                    attention_weights = tf.sigmoid(attention_weights)
                elif self.config["attention_activation"] == "linear":
                    pass
                else:
                    raise ValueError("Unknown activation for attention: " + str(self.config["attention_activation"]))

                self.attention_weights_unnormalised = attention_weights
                attention_weights = tf.where(tf.sequence_mask(self.context_lengths), attention_weights, tf.zeros_like(attention_weights))
                attention_weights = attention_weights / tf.reduce_sum(attention_weights, 1, keep_dims=True)
                processed_tensor_context = tf.reduce_sum(lstm_outputs * attention_weights[:,:,numpy.newaxis], 1)
          if self.config["hidden_layer_size"] > 0:
              #processed_tensor_context = tf.reduce_mean(lstm_outputs,1)
              processed_tensor_context = tf.layers.dense(processed_tensor_context, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)


####################################################################### KNOWLEDGE Bi-LSTM ####################################################################################################
         processed_tensor_1 = processed_tensor_sentence
         with tf.variable_scope("knowledge"):
           knowledge_input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids_knowledge)
           input_vector_size = self.config["word_embedding_size"]
           
           know_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["word_embedding_size"],
                    use_peepholes=self.config["lstm_use_peepholes"], 
                    state_is_tuple=True, 
                    initializer=self.initializer,
                    reuse=False)
           know_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["word_embedding_size"],
                    use_peepholes=self.config["lstm_use_peepholes"], 
                    state_is_tuple=True, 
                    initializer=self.initializer,
                    reuse=False)
           
           self.word_representations = knowledge_input_tensor
           s = tf.shape(knowledge_input_tensor)
           knowledge_input_tensor = tf.reshape(knowledge_input_tensor, shape=[s[0]*s[1], s[2], self.config["word_embedding_size"]])
           knowledge_lengths = tf.reshape(self.knowledge_max_lengths, shape=[s[0]*s[1]])
           dropout_input = self.config["dropout_input"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
           knowledge_input_tensor =  tf.nn.dropout(knowledge_input_tensor, dropout_input, name="dropout_word")
              
           char_lstm_outputs = tf.nn.bidirectional_dynamic_rnn(know_lstm_cell_fw, know_lstm_cell_bw, knowledge_input_tensor, sequence_length=knowledge_lengths, dtype=tf.float32, time_major=False)
           _, ((_, char_output_fw), (_, char_output_bw)) = char_lstm_outputs
           lstm_outputs = tf.concat([char_output_fw, char_output_bw], -1)
          '''                        
           if self.config["sentence_composition"] == "attention":      
                attention_evidence = tf.layers.dense(lstm_outputs, self.config["attention_evidence_size"], activation=tf.sigmoid, kernel_initializer=self.initializer)
                attention_weights = tf.layers.dense(attention_evidence, 1, activation=None, kernel_initializer=self.initializer)
                attention_weights = tf.reshape(attention_weights, shape=tf.shape(self.word_ids_knowledge))
                if self.config["attention_activation"] == "sharp":
                    attention_weights = tf.softmax(attention_weights)
                elif self.config["attention_activation"] == "soft":
                    attention_weights = tf.sigmoid(attention_weights)
                elif self.config["attention_activation"] == "linear":
                    pass
                else:
                    raise ValueError("Unknown activation for attention: " + str(self.config["attention_activation"]))

                self.attention_weights_unnormalised = attention_weights
                attention_weights = tf.where(tf.sequence_mask(self.knowledge_max_lengths), attention_weights, tf.zeros_like(attention_weights))
                attention_weights = attention_weights / tf.reduce_sum(attention_weights, 1, keep_dims=True)
                atten_shape = tf.shape(attention_weights)
                attention_weights = tf.reshape(attention_weights, shape=[tf.shape(attention_weights)[0]*tf.shape(attention_weights)[1],tf.shape(attention_weights)[2]])
                lstm_outputs = tf.reduce_sum(lstm_outputs * attention_weights[:,:,numpy.newaxis], 1)
          
           '''
           lstm_outputs = tf.reshape(lstm_outputs, shape=[s[0], s[1], 2*self.config["word_embedding_size"]])
           dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32))
           lstm_outputs = tf.nn.dropout(lstm_outputs, dropout_word_lstm)
           if self.config["whidden_layer_size"] > 0:
              lstm_outputs = tf.layers.dense(lstm_outputs, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)
           knowledge_output_vector_size = 2 * self.config["word_embedding_size"] 
           self.lstm_outputs = lstm_outputs         
           t_lstm_outputs = tf.transpose(lstm_outputs, [0, 2, 1])
           if self.config["sentence_composition"] == "attention":
                processed_tensor_1 = tf.expand_dims(processed_tensor_1, -1) #batch, Dim, 1
                processed_tensor_1 = tf.transpose(processed_tensor_1, [0,2,1])
                attention_weights = tf.matmul(processed_tensor_1,t_lstm_outputs) #batch, length_of_sentence, number of Knowledge
                if self.config["attention_activation"] == "sharp":
                    attention_weights = tf.exp(attention_weights)
                elif self.config["attention_activation"] == "soft":
                    attention_weights = tf.nn.softmax(attention_weights)
                    #pass
                elif self.config["attention_activation"] == "linear":
                    pass
                else:
                    raise ValueError("Unknown activation for attention: " + str(self.config["attention_activation"]))

                self.attention_weights_unnormalised = attention_weights 
                #attention_weights = tf.transpose(attention_weights, [0, 2, 1])# batch, 1,number of Knowledge
                self.attention_weights = attention_weights 
                #attention_weights = tf.squeeze(attention_weights)
                #attention_weights = tf.exp(attention_weights)
                sum_attention_weights = attention_weights
                #sum_attention_weights = tf.squeeze(attention_weights)
                #attention_weights = tf.reshape(attention_weights, [s[0],s[1],s[2]])
                #attention_weights = tf.where(tf.sequence_mask(self.knowledge_max_lengths), attention_weights, tf.zeros_like(attention_weights))
                #attention_weights = attention_weights / tf.reduce_sum(sum_attention_weights,-1, keep_dims=True)
                #attention_weights = tf.reshape(attention_weights, [s[0],s[1]*s[2]])
                #attention_weights = tf.expand_dims(attention_weights, -1)
                self.attention_weights = tf.squeeze(attention_weights)

                #attention_weights = tf.squeeze(attention_weights)
                
                #weights_path = tf.expand_dims(self.weights_path, -1)
                #weights_path = tf.transpose(weights_path, [0,2,1])
                #attention_weights = tf.matmul(weights_path,attention_weights)
                attention_weights = tf.transpose(attention_weights, [0, 2, 1])
                #attention_weights = tf.expand_dims(attention_weights, -1)
                
                processed_tensor_knowledge = tf.reduce_sum(lstm_outputs * attention_weights, axis=1)  # bs, d
                processed_tensor_knowledge = tf.layers.dense(processed_tensor_knowledge, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer) 
                #processed_tensor_knowledge_att = tf.expand_dims(processed_tensor_knowledge, -1) #batch, Dim, 1
                #processed_tensor_knowledge_att = tf.transpose(processed_tensor_knowledge_att, [0,2,1]) #batch,1,Dim
                            ### attention over attention for the sentence
                #attention_weights = tf.matmul(processed_tensor_knowledge_att, processed_tensor_1)
                #attention_weights = tf.nn.softmax(attention_weights)
                #attention_weights = tf.transpose(attention_weights, [0, 2, 1])
                #processed_tensor_knowledge_sentence = tf.reduce_sum(attention_weights * tf.transpose(processed_tensor_1,[0,2,1]), axis=1)
                
           #if self.config["hidden_layer_size"] > 0:
                #processed_tensor_knowledge = tf.layers.dense(processed_tensor_knowledge, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer) 
                #processed_tensor_knowledge_sentence = tf.layers.dense(processed_tensor_knowledge_sentence, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)        
                                
          
          
#####################################################################################################################################################
###############################################################CALCULATE SCORE################################################################
#####################################################################################################################################################  

          
         if self.config["sentence_composition"] == "attention":
              dense_input_sen_con = tf.concat([processed_tensor_sentence, processed_tensor_context],1)
              
              dense_input_sen_con = tf.layers.dense(dense_input_sen_con, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)      
              dense_input = tf.concat([processed_tensor_sentence, processed_tensor_knowledge],1) #,processed_tensor_knowledge,,processed_tensor_context
              dense_input = tf.layers.dense(dense_input, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer) 
              
              
              final_score = (dense_input * processed_tensor_sentence) + (dense_input * processed_tensor_knowledge)
              final_score = dense_input_sen_con
              softmax_w = tf.get_variable('softmax_w', shape=[100, len(reiss)],initializer=tf.zeros_initializer, dtype=tf.float32)    
              
         elif self.config["sentence_composition"] == "last":   
              dense_input = tf.concat([processed_tensor_sentence, processed_tensor_context],2) #,processed_tensor_knowledge,processed_tensor_sentence,,
              dense_input = tf.reshape(dense_input,[self.batch_size, self.max_lengths[0] * dense_input.get_shape()[2]])#self.max_lengths[0] * dense_input.get_shape()[2]])
              #dense_input = tf.concat([dense_input, processed_tensor_knowledge],1)
              softmax_w = tf.get_variable('softmax_w',shape = [56*200,len(reiss)], initializer=tf.zeros_initializer, dtype=tf.float32)
              
              
         softmax_b = tf.get_variable('softmax_b', shape=[len(reiss)],initializer=tf.zeros_initializer, dtype=tf.float32)
          
          #if self.config["hidden_layer_size"] > 0:
          #    dense_input = tf.layers.dense(dense_input, self.config["hidden_layer_size"], activation=tf.nn.relu, kernel_initializer=self.initializer)
          
         self.sentence_scores = tf.matmul(final_score, softmax_w) + softmax_b
          
          
          
##################################################################################################################################################### 
###############################################################CALCULATE SCORE################################################################
#####################################################################################################################################################  

         if self.config["human_needs"] == "maslow":
                          
                    w = [3.3580651133263086, 2.4043071629811266, 2.948496008202039, 2.609976477765905, 2.3545068920496965]
         else:
                    #with belonging: 
                    w = [3.929112469627414, 4.352634266669815, 4.105348968927056, 4.009469417408209, 4.436903109491611, 3.4714643441750805, 4.533726764493145, 3.665643259544512, 5.264175448882736, 6.026320782448594, 3.7522367243231805, 3.8019798963053515, 7.896001211803761, 8.024995943144209, 15.275082043791086, 3.3076036095385644, 3.81662584588786, 8.618279130276653, 6.7344516295276895]
                    #without belonging class: 
                    #w = [3.929112469627414, 4.352634266669815, 4.105348968927056, 4.009469417408209, 4.436903109491611, 3.4714643441750805, 4.533726764493145, 3.665643259544512, 5.264175448882736, 6.026320782448594, 3.7522367243231805, 3.8019798963053515, 7.896001211803761, 8.024995943144209, 3.3076036095385644, 3.81662584588786, 8.618279130276653, 6.7344516295276895]
                       
         w = tf.convert_to_tensor(w, dtype=tf.float32)
         lossy = tf.nn.weighted_cross_entropy_with_logits(targets=self.sentence_labels,logits=self.sentence_scores, pos_weight=w)
          
         self.loss = tf.reduce_sum(lossy)
         regularizer = tf.nn.l2_loss(softmax_w)
         self.loss = tf.reduce_mean(self.loss+(0.01 * regularizer))
         self.sentence_scores = tf.nn.sigmoid(self.sentence_scores)
         self.train_op = self.construct_optimizer(self.config["opt_strategy"], self.loss, self.learningrate, self.config["clip"])
예제 #28
0
파일: runner.py 프로젝트: wh-forker/DALI
def _cnn_model_function(features, labels, mode, params):
    model_func = params['model']
    model_format = params['format']
    model_dtype = params['dtype']
    momentum = params['momentum']
    learning_rate_init = params['learning_rate_init']
    learning_rate_power = params['learning_rate_power']
    decay_steps = params['decay_steps']
    weight_decay = params['weight_decay']
    loss_scale = params['loss_scale']
    larc_eta = params['larc_eta']
    larc_mode = params['larc_mode']
    deterministic = params['deterministic']
    num_classes = params['n_classes']
    dali_cpu = params['dali_cpu']

    device = '/gpu:0'
    labels = tf.reshape(labels, (-1, ))  # Squash unnecessary unary dim
    inputs = features  # TODO: Should be using feature columns?
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)

    with tf.device(device):
        inputs = tf.cast(inputs, model_dtype)
        if model_format == 'channels_first':
            inputs = tf.transpose(inputs, [0, 3, 1, 2])
        with nvutils.fp32_trainable_vars(
                regularizer=tf.contrib.layers.l2_regularizer(weight_decay)):
            top_layer = model_func(inputs, training=is_training)
            logits = tf.layers.dense(top_layer, num_classes)
        predicted_classes = tf.argmax(logits, axis=1, output_type=tf.int32)
        logits = tf.cast(logits, tf.float32)
        if mode == tf.estimator.ModeKeys.PREDICT:
            probabilities = tf.softmax(logits)
            predictions = {
                'class_ids': predicted_classes[:, None],
                'probabilities': probabilities,
                'logits': logits
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)
        loss = tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                      labels=labels)
        loss = tf.identity(
            loss, name='loss'
        )  # For access by logger (TODO: Better way to access it?)
        reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        loss = tf.add_n([loss] + reg_losses, name='total_loss')
        with tf.device(
                None):  # Allow fallback to CPU if no GPU support for these ops
            top1_accuracy = tf.metrics.accuracy(labels=labels,
                                                predictions=predicted_classes)
            top5_accuracy = tf.metrics.mean(
                tf.nn.in_top_k(predictions=logits, targets=labels, k=5))
            tf.summary.scalar('top1_accuracy', top1_accuracy[1])
            tf.summary.scalar('top5_accuracy', top5_accuracy[1])
        if mode == tf.estimator.ModeKeys.EVAL:
            metrics = {
                'top1_accuracy': top1_accuracy,
                'top5_accuracy': top5_accuracy
            }
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)
        assert (mode == tf.estimator.ModeKeys.TRAIN)
        #batch_size = inputs.shape[0]
        batch_size = tf.shape(inputs)[0]
        learning_rate = tf.train.polynomial_decay(learning_rate_init,
                                                  tf.train.get_global_step(),
                                                  decay_steps=decay_steps,
                                                  end_learning_rate=0.,
                                                  power=learning_rate_power,
                                                  cycle=False,
                                                  name='learning_rate')
        opt = tf.train.MomentumOptimizer(learning_rate,
                                         momentum,
                                         use_nesterov=True)
        opt = hvd.DistributedOptimizer(opt)
        opt = nvutils.LarcOptimizer(opt,
                                    learning_rate,
                                    larc_eta,
                                    clip=larc_mode)
        opt = nvutils.LossScalingOptimizer(opt, scale=loss_scale)
        gate_gradients = (tf.train.Optimizer.GATE_OP
                          if deterministic else tf.train.Optimizer.GATE_NONE)
        train_op = opt.minimize(loss,
                                global_step=tf.train.get_global_step(),
                                gate_gradients=gate_gradients,
                                name='step_update')
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
        train_op = tf.group(train_op, update_ops)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
예제 #29
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1.0, 1.0),
                                 name="W")
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")
                conv = tf.nn.conv2d(self.embedded_chars_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            # print self.scores
            self.scored_predictions = tf.softmax(self.scores,
                                                 name="scored_predictions")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
예제 #30
0
    def _build_model(self):
        """
        Build computation graph
        """
        # placeholders
        self.x_ph = tf.placeholder(tf.float32, [None, None, self.x_dim], name='inputs')
        self.y_ph = tf.placeholder(tf.float32, [None, self.y_dim], name='targets')

        # build 1D convolutional blocks for each channel
        cnn_outputs = []  # a list to collect multi-channel 1D cnn outputs
        for channel in range(self.x_dim):
            with tf.name_scope('Conv_Maxpool_{}'.format(channel)):
                # filters and biases of 1D conv layers
                with tf.variable_scope('conv_maxpool_{}'.format(channel)):
                    filter1 = tf.get_variable('filter1', [5, 1, 8])
                    bias1 = tf.get_variable('bias1', [8], initializer=tf.constant_initializer(0.0))
                    filter2 = tf.get_variable('filter2', [3, 8, 4])
                    bias2 = tf.get_variable('bias2', [4], initializer=tf.constant_initializer(0.0))

                inputs = tf.reshape(self.x_ph[:, :, channel], [-1, self.seqlen, 1])

                # 1D cnn block 1, seqlen: 32 --> 14
                # filter shape [filter_width, in_channels, out_channels]
                conv1 = tf.nn.conv1d(
                    value=inputs, 
                    filters=filter1, 
                    stride=1, 
                    padding='VALID', 
                    name='conv1d_1'
                )
                seqlen = self.seqlen - 4
                h1 = tf.nn.relu(tf.nn.bias_add(conv1, bias1), name='h1_relu')
                h1 = tf.reshape(h1, shape=[-1, 1, self.seqlen - 4, 8])
                avgpool1 = tf.nn.avg_pool(
                    value=h1, 
                    ksize=[1, 1, 2, 1],  
                    strides=[1, 1, 2, 1],
                    padding='VALID', 
                    name='avg_pool_1'
                )
                avgpool1 = tf.reshape(avgpool1, shape=[-1, 14, 8])

                # 1D cnn block 2, seqlen: 14 --> 6
                conv2 = tf.nn.conv1d(
                    value=avgpool1, 
                    filters=filter2W, 
                    stride=1, 
                    padding='VALID', 
                    name='conv1d_2'
                )
                h2 = tf.nn.relu(tf.nn.bias_add(conv2, bias2))
                h2 = tf.reshape(h2, shape = [-1, 1, 12, 4])
                avgpool2 = tf.nn.avg_pool(
                    value=h2, 
                    ksize=[1, 1, 2, 1], 
                    strides=[1, 1, 2, 1], 
                    padding='VALID', 
                    name='avg_pool_2'
                )
                avgpool2 = tf.reshape(avgpool2, shape=[-1, 1, 6, 4])

                # collect multi-channel outputs
                cnn_outputs.append(avgpool2)

        # Combine all channels' cnn outputs
        cnn_outputs = tf.concat(cnn_outputs, axis=3)
        num_filters = self.x_dim * 4
        cnn_outputs_flat = tf.reshape(cnn_outputs, [-1, num_filters_total * 6])  # [batch, x_dim * 24]

        # fully connected layer
        with tf.name_scope('Dense'):
            with tf.variable_scope('logits'):
                dense1_w = tf.get_variable('wd1', [6 * 4 * 16, 16])
                dense1_b = tf.get_variable('bd1', [16], initializer=tf.constant_initializer(0.0))
            fc1 = tf.matmul(cnn_outputs_flat, dense1_w) + dense1_b
            dense_outputs = tf.nn.relu(fc1)
            # dropout
            dense1_dropout = tf.nn.dropout(dense_outputs, dropout, name='dropout')

        # final outputs
        with tf.variable_scope('Logits'):
            logits_w = tf.get_variable('logits_w', [16, self.y_dim])
            logits_b = tf.get_variable('logits_b', [self.y_dim], initializer=tf.constant_initializer(0.0))

        logits = tf.matmul(dense1_dropout, logits_w) + logits_b

        # predictions
        with tf.name_scope('Prediction'):
            self.preds = tf.softmax(logits=logits) 

        # training with gradient descent, global variables
        with tf.name_scope('Global'):
            global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
            self.learning_rate = tf.train.exponential_decay(
                learning_rate=self.initial_learning_rate,
                global_step=global_step,
                decay_steps=self.decay_steps,
                decay_rate=self.decay_rate
            )
        with tf.name_scope('Loss'):
            self.loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=self.y_ph))
        with tf.name_scope('Train'):
            self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(loss, global_step=global_step)

        # summaries
        self.summaries = tf.summary.merge([
            tf.summary.scalar('loss', self.loss),
        ])
예제 #31
0
 def forward(self, x):
     return tf.softmax(x)
예제 #32
0
def res_net(x, y, activation=tf.nn.relu):
  """Builds a residual network.

  Note that if the input tensor is 2D, it must be square in order to be
  converted to a 4D tensor.

  Borrowed structure from:
  github.com/pkmital/tensorflow_tutorials/blob/master/10_residual_network.py

  Args:
    x: Input of the network
    y: Output of the network
    activation: Activation function to apply after each convolution

  Returns:
    Predictions and loss tensors.
  """

  # Configurations for each bottleneck group.
  BottleneckGroup = namedtuple('BottleneckGroup',
                               ['num_blocks', 'num_filters', 'bottleneck_size'])
  groups = [
      BottleneckGroup(3, 128, 32), BottleneckGroup(3, 256, 64),
      BottleneckGroup(3, 512, 128), BottleneckGroup(3, 1024, 256)
  ]

  input_shape = x.get_shape().as_list()

  # Reshape the input into the right shape if it's 2D tensor
  if len(input_shape) == 2:
    ndim = int(sqrt(input_shape[1]))
    x = tf.reshape(x, [-1, ndim, ndim, 1])

  # First convolution expands to 64 channels
  with tf.variable_scope('conv_layer1'):
    net = convolution2d(
        x, 64, 7, normalizer_fn=batch_norm, activation_fn=activation)

  # Max pool
  net = tf.nn.max_pool(net, [1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

  # First chain of resnets
  with tf.variable_scope('conv_layer2'):
    net = convolution2d(net, groups[0].num_filters, 1, padding='VALID')

  # Create the bottleneck groups, each of which contains `num_blocks`
  # bottleneck groups.
  for group_i, group in enumerate(groups):
    for block_i in range(group.num_blocks):
      name = 'group_%d/block_%d' % (group_i, block_i)

      # 1x1 convolution responsible for reducing dimension
      with tf.variable_scope(name + '/conv_in'):
        conv = convolution2d(
            net,
            group.bottleneck_size,
            1,
            padding='VALID',
            activation_fn=activation,
            normalizer_fn=batch_norm)

      with tf.variable_scope(name + '/conv_bottleneck'):
        conv = convolution2d(
            conv,
            group.bottleneck_size,
            3,
            padding='SAME',
            activation_fn=activation,
            normalizer_fn=batch_norm)

      # 1x1 convolution responsible for restoring dimension
      with tf.variable_scope(name + '/conv_out'):
        input_dim = net.get_shape()[-1].value
        conv = convolution2d(
            conv,
            input_dim,
            1,
            padding='VALID',
            activation_fn=activation,
            normalizer_fn=batch_norm)

      # shortcut connections that turn the network into its counterpart
      # residual function (identity shortcut)
      net = conv + net

    try:
      # upscale to the next group size
      next_group = groups[group_i + 1]
      with tf.variable_scope('block_%d/conv_upscale' % group_i):
        net = convolution2d(
            net,
            next_group.num_filters,
            1,
            activation_fn=None,
            biases_initializer=None,
            padding='SAME')
    except IndexError:
      pass

  net_shape = net.get_shape().as_list()
  net = tf.nn.avg_pool(
      net,
      ksize=[1, net_shape[1], net_shape[2], 1],
      strides=[1, 1, 1, 1],
      padding='VALID')

  net_shape = net.get_shape().as_list()
  net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])

  target = tf.one_hot(y, depth=10, dtype=tf.float32)
  logits = tf.contrib.layers.fully_connected(net, 10, activation_fn=None)
  loss = tf.losses.softmax_cross_entropy(target, logits)
  return tf.softmax(logits), loss
def train_softmax(x, y, x_test, y_test, learning_rate=0.005, max_iterations=1000000, regularization=1., w_diff_term_crit=0.0001, print_per_iteration=False):
    assert (x.shape[1] == x_test.shape[1],
            "train shape:" + str(x.shape) +
            " and test shape:" + str(x_test.shape) +
            " do not match in dimensionality")


    assert (x.shape[0] == y.shape[0],
            "number of training samples:" + str(x.shape) +
            " and number of labels:" + str(y.shape) +
            " do not match!")
    assert (x_test.shape[0] == y_test.shape[0],
            "number of testing samples:" + str(x_test.shape) +
            " and number of labels:" + str(y_test.shape) +
            " do not match!")


    # set up constants
    num_input_dims = x.shape[1]
    num_label_dims = y.shape[1]

    reg_fact = tf.constant(regularization, name='regularization_factor')

    with tf.name_scope('input'):
        x_input = tf.placeholder(tf.float32, shape=[None, num_input_dims], name='input')
    with tf.name_scope('target'):
        y_ = tf.placeholder(tf.float32, shape=[None, num_label_dims], name='target')

    # linear regression
    with tf.name_scope('linear_regression'):
        # init_vals = , name='truncated_normal_init_val_w')
        w = tf.Variable(tf.truncated_normal([num_input_dims, num_label_dims], stddev=1. / math.sqrt(2)), name='w')
        b = tf.Variable(tf.zeros([num_label_dims]), name='b')
        output = tf.softmax(tf.matmul(x_input, w) + b)

    with tf.name_scope('regularization'):
        l2loss = tf.nn.l2_loss(w,name="l2_loss")
        regularization_penalty = tf.reduce_sum(tf.square(l2loss), name='regularization_penalty_sum')
        regularization_penalty *= reg_fact

    with tf.name_scope('loss'):
        # squared error loss + regularizationPenalty
        loss = tf.nn.softmax_cross_entropy_with_logits(output,y_)
        # diff = y_ - output
        # sq_diff = tf.square(diff)
        # loss = tf.reduce_mean(sq_diff) + regularization_penalty
        # loss = tf.reduce_mean(sq_diff)

    with tf.name_scope('optimizer'):
        opt = tf.train.GradientDescentOptimizer(learning_rate)
        grads = opt.compute_gradients(loss)
        opt = opt.apply_gradients(grads)

    init = tf.initialize_all_variables()
    sess = tf.Session()

    sess.run(init)

    its = 0
    loss_train = -1.

    w_old = sess.run(w)[0][0]
    for i in xrange(0, max_iterations):
        w__, output__, loss__, _, regularization_penalty__ = sess.run(
            [w, output, loss, opt, regularization_penalty], feed_dict={x_input: x, y_: y})
        if i % 1 and print_per_iteration == 0:
            print "regularization_penalty:", regularization_penalty__
            print "iteration:", i
            print "weight:", w__
            print "loss:", loss__
        w_new = sess.run(w)[0][0]
        its += 1
        w_diff = np.sum(np.abs(w_new - w_old))

        # todo include termination criterion (weight change)
        if w_diff < w_diff_term_crit:
            print "reg_param:", regularization, "finished at iteration:", its, w_new
            # print "weights:", w_new
            # print "weight_difference:", w_diff
            break
        w_old = w_new

    loss_test = sess.run([loss], feed_dict={x_input: x_test, y_: y_test})
    sess.close()
    tf.reset_default_graph()
    return its, loss_test, loss_train