Ejemplo n.º 1
0
    def forward(self, query, key, value, mask=None):
        # query: (batch, num_query, d_embedding)
        # key: (batch, num_key, d_embedding)
        # value: (batch, num_value, d_embedding)
        if mask is not None:  # Same padding mask applied to all h heads.
            mask = mask.unsqueeze(1)

        batch_size = query.size(0)

        # linear projection for query, key and value from (batch, num_word, d_embedding)
        #                                               to (batch, num_head, num_word, d_k)
        query = self.linears[0](query).view(batch_size, -1, self.head,
                                            self.d_k).transpose(1, 2)
        key = self.linears[1](key).view(batch_size, -1, self.head,
                                        self.d_k).transpose(1, 2)
        value = self.linears[2](value).view(batch_size, -1, self.head,
                                            self.d_k).transpose(1, 2)
        print('query shape in multihead: {}'.format(query.shape))
        # Scaled Dot-Product Attention for each batch (batch, heads, num_word, d_k)
        x, self.attn = attention(query,
                                 key,
                                 value,
                                 mask=mask,
                                 dropout=self.dropout)

        # "Concatenate" heads and apply final linear (batch, heads, num_query, d_k)
        #                                            =>(batch, num_query, d_embedding)
        x = x.transpose(1, 2).contiguous().view(batch_size, -1,
                                                self.head * self.d_k)
        return self.linears[-1](x)
Ejemplo n.º 2
0
    def forward(self, query, key, value, mask=None):
        """Implements Figure 2
        @param query (tensor(float)]): a tensor of size (batch_size, query_length, embed_size)   here embed_size == d_model
        @param key (tensor(float)]): a tensor of size (batch_size, key_length, embed_size)
        @param value (tensor(float)): a tensor of size (batch_size, value_length, embed_size)
        @param mask (tensor(int)): a tensor of size (batch_size, 1, sentence_length)   (Not sure about whether it's always that size in dim 1)
        @returns a result tensor
        """
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(
                1)  # To be broadcastable with attention result
        nbatches = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        query, key, value = \
            [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)    # (batch, h, sent_len, d_k)
             for l, x in zip(self.linears, (query, key, value))]

        # 2) Apply attention on all the projected vectors in batch.
        x, self.attn = attention(query,
                                 key,
                                 value,
                                 mask=mask,
                                 dropout=self.dropout)

        # 3) "Concat" using a view and apply a final linear.
        x = x.transpose(1, 2).contiguous() \
            .view(nbatches, -1, self.h * self.d_k)
        return self.linears[-1](x)
Ejemplo n.º 3
0
    def forward(self, query, key, value, mask=None):
        '''
        其中key 和 value的 size 一定相同
        :param query: [batch_size,q_len]
        :param key: [batch_size,k_len]
        :param value: [batch_size,k_len]
        :param mask: [batch_size,q_len,k_len]
        :return:
            context_vec: [batch_size,q_len,k_len]
        '''
        batch_size = query.size(0)
        query = self.query_linear(query)
        key = self.key_linear(key)
        value = self.value_linear(value)

        # split by heads
        # [batch_size * head,seq_len,d_k]
        query = query.view(batch_size * self.h, -1, self.d_k)
        key = key.view(batch_size * self.h, -1, self.d_k)
        value = value.view(batch_size * self.h, -1, self.d_k)

        if mask is not None:
            # [batch_size * h,q_len,k_len]
            mask = mask.expand(self.h, -1, -1)

        # context_vec [batch_size * h,q_len,d_k]
        context_vec, self.attn = attention(query, key, value, mask,
                                           self.dropout)
        context_vec = context_vec.contiguous().view(batch_size, -1,
                                                    self.double() * self.h)
        return self.proj_linear(context_vec)
Ejemplo n.º 4
0
    def forward(self, query, key, value, mask=None):
        # query.size() = key.size() = value.size() = (batch_size, max_len, d_model)
        if mask is not None:
            mask = mask.unsqueeze(1)
        batch_size = query.size(0)
        """
        do all the linear projection, after this operation
        query.size() = key.size() = value.size() = (batch_size, self.h, max_len, self.d_k)
        """
        query, key, value = \
                [linear(x).view(batch_size, -1, self.h, self.d_k).transpose(1, 2) for
                        linear, x in zip(self.linears, (query, key, value))]
        """
        x.size(): (batch_size, h, max_len, d_v)
        self.attn.size(): (batch_size, h, max_len, d_v)
        """
        x, self.attn = attention(query,
                                 key,
                                 value,
                                 mask=mask,
                                 dropout=self.dropout)
        """
        x.transpose(1,2).size(): (batch_size, max_len, h, d_v)
        the transpose operation is necessary
        x.size: (batch_size, max_len, h*d_v)
        """
        x = x.transpose(1, 2).contiguous().view(batch_size, -1,
                                                self.h * self.d_k)

        # self.linears[-1] \in R^{hd_v \times d_{model}}
        return self.linears[-1](x)
Ejemplo n.º 5
0
    def forward(self, query, key, value, mask = None):
        if mask is not None:
            # Same mask applied to all h heads.
            mask = mask.unsqueeze(1)

        nbatches = query.size(0)

        # 1) Do all the linear projections in batch from d_model => h x d_k
        query, key, value = [l(x).view(nbatches, -1, self.h, self.d_k).transpose(1, 2)
                             for l, x in zip(self.linears, (query, key, value))]

        # 2) Apply attention on all the projected vectors in batch.
        x, self.attn = attention(
            query,
            key,
            value,
            mask = mask,
            dropout = self.dropout
        )

        # 3) "Concat" using a view and apply a final linear.
        x = x.transpose(1, 2).contiguous().view(nbatches, -1, self.h * self.d_k)
        x = self.linears[-1](x)

        return x
Ejemplo n.º 6
0
 def alstm_layer(self, inputs, lengths, state_size, keep_prob=1.0,
      scope = 'lstm-layer', reuse=False):
     with tf.variable_scope(scope, reuse=reuse):
         cell = tf.contirb.rnn.DropoutWrapper(
             tf.contirb.rnn.LSTMCell(
                 state_size,
                 reuse=reuse
             ),
             output_keep_prob=keep_prob
         )
         outputs, output_state = tf.nn.dynamic_rnn(
             inputs=inputs,
             cell=cell,
             sequence_length=lengths,
             dtype=tf.float32
         )
         outputs = attention(outputs, self.attention_size, time_major=False, return_alphas=False):
         return outputs
Ejemplo n.º 7
0
def decoder(inputs, memory, is_training=True, scope="decoder"):
    """
    A content-based tanh attention decoder using a stack of GRUs with vertical
    residual connections.

    Takes the output from the encoder, runs it though a prenet,
    then processes with attention. After finishing the attention,
    generates the decoder RNN.

    Although the decoder could directly target the raw spectogram, this would
    be a highly redundant representation for the purpose of learning alignment
    between speech signal and text. Thus the target is an 80-band mel-scale
    spectogram, though fewer bands or more concise targets such as
    cepstrum could be used.

    :param inputs:
    :param memory:
    :param is_training:
    :param scope:
    :return:
    """
    with tf.variable_scope(scope):
        # prenet
        inputs = pre_net(inputs, is_training=is_training)

        # With Attention
        outputs, state = attention(inputs, memory, num_units=EMBED_SIZE)

        # Transpose
        alignments = tf.transpose(state.alignment_history.stack(), [1, 2, 0])

        # Decoder RNNs - 2-Layer Residual GRU (256 cells)
        outputs += decoder_rnn(outputs, scope="decoder_rnn1")
        outputs += decoder_rnn(outputs, scope="decoder_rnn2")

        # An 80-band mel-scale spectogram is the target
        mel_hats = tf.layers.dense(outputs, N_MELS * REDUCTION_FACTOR)
    return mel_hats, alignments
Ejemplo n.º 8
0
    def __init__(self,
                 batch_size,
                 num_unroll_steps,
                 embeddings,
                 embedding_size,
                 attention_dim,
                 rnn_size,
                 num_rnn_layers,
                 num_classes,
                 max_grad_norm,
                 dropout=1.,
                 l2_reg_lambda=0.0,
                 adjust_weight=False,
                 label_weight=[],
                 is_training=True):
        # define input variable
        self.keep_prob = dropout
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.attention_dim = attention_dim
        self.num_classes = num_classes
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.l2_reg_lambda = l2_reg_lambda
        self.max_grad_norm = max_grad_norm
        self.is_training = is_training

        self.input_data = tf.placeholder(tf.int32,
                                         [None, self.num_unroll_steps])
        self.target = tf.placeholder(tf.int64, [None])
        self.mask_x = tf.placeholder(tf.float32, [self.num_unroll_steps, None])

        #build BILSTM network
        # forward rnn
        #fw_gru_cell = tf.nn.rnn_cell.GRUCell(self.rnn_size)
        fw_gru_cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_size)
        #if self.is_training and self.keep_prob < 1:
        #    fw_gru_cell =  tf.nn.rnn_cell.DropoutWrapper(
        #        fw_gru_cell, input_keep_prob=self.keep_prob, output_keep_prob = self.keep_prob
        #    )

        fw_cell = tf.nn.rnn_cell.MultiRNNCell([fw_gru_cell] *
                                              self.num_rnn_layers,
                                              state_is_tuple=True)
        # backforward rnn
        #bw_gru_cell = tf.nn.rnn_cell.GRUCell(self.rnn_size)
        bw_gru_cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_size)
        #if self.is_training and self.keep_prob < 1:
        #    bw_gru_cell =  tf.nn.rnn_cell.DropoutWrapper(
        #        bw_gru_cell, input_keep_prob=self.keep_prob, output_keep_prob = self.keep_prob
        #    )

        bw_cell = tf.nn.rnn_cell.MultiRNNCell([bw_gru_cell] *
                                              self.num_rnn_layers,
                                              state_is_tuple=True)

        #embedding layer
        with tf.device("/cpu:0"), tf.name_scope("embedding_layer"):
            self.embeddings = tf.Variable(tf.to_float(self.embeddings),
                                          trainable=True,
                                          name="embeddings")
            inputs = tf.nn.embedding_lookup(self.embeddings, self.input_data)

        # dropout
        if self.is_training and self.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, self.keep_prob)

        inputs = [
            tf.squeeze(input, [1])
            for input in tf.split(1, self.num_unroll_steps, inputs)
        ]

        out_put, _, _ = tf.nn.bidirectional_rnn(fw_cell,
                                                bw_cell,
                                                inputs,
                                                dtype=tf.float32)

        out_put = tf.transpose(out_put,
                               perm=[1, 0,
                                     2])  #(batch_size, steps, rnn_size*2)

        output = attention(out_put, self.attention_dim, self.l2_reg_lambda)
        #output = tf.squeeze(out_put[:, -1, :])

        # dropout
        if self.is_training and self.keep_prob < 1:
            output = tf.nn.dropout(output, self.keep_prob)
        #out_put = out_put * self.mask_x[:,:,None]

        #with tf.name_scope("mean_pooling_layer"):
        #    out_put = tf.reduce_sum(out_put,0)/(tf.reduce_sum(self.mask_x,0)[:,None])

        with tf.name_scope("Softmax_layer_and_output"):
            softmax_w = tf.get_variable(
                "softmax_w",
                initializer=tf.truncated_normal(
                    [2 * self.rnn_size, self.num_classes], stddev=0.1))
            softmax_b = tf.get_variable("softmax_b",
                                        initializer=tf.constant(0., shape=[1]))
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            if self.l2_reg_lambda > 0:
                l2_loss += tf.nn.l2_loss(softmax_w)
                l2_loss += tf.nn.l2_loss(softmax_b)
                weight_decay = tf.mul(l2_loss,
                                      self.l2_reg_lambda,
                                      name='l2_loss')
                tf.add_to_collection('losses', weight_decay)

        with tf.name_scope("loss"):
            self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.logits, self.target)
            tf.add_to_collection('losses', self.loss)
            total_loss = tf.add_n(tf.get_collection('losses'),
                                  name='total_loss')
            #self.cost = tf.reduce_mean(self.loss)
            self.cost = tf.reduce_mean(total_loss)

        with tf.name_scope("accuracy"):
            self.prediction = tf.argmax(self.logits, 1)
            correct_prediction = tf.equal(self.prediction, self.target)
            self.correct_num = tf.reduce_sum(
                tf.cast(correct_prediction, tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   tf.float32),
                                           name="accuracy")

        #add summary
        loss_summary = tf.scalar_summary("loss", self.cost)
        #add summary
        accuracy_summary = tf.scalar_summary("accuracy_summary", self.accuracy)

        if not is_training:
            return

        self.globle_step = tf.Variable(0, name="globle_step", trainable=False)
        self.lr = tf.Variable(0.0, trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          self.max_grad_norm)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in zip(grads, tvars):
            if g is not None:
                grad_hist_summary = tf.histogram_summary(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        self.grad_summaries_merged = tf.merge_summary(grad_summaries)

        self.summary = tf.merge_summary(
            [loss_summary, accuracy_summary, self.grad_summaries_merged])

        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        optimizer.apply_gradients(zip(grads, tvars))
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))

        self.new_lr = tf.placeholder(tf.float32,
                                     shape=[],
                                     name="new_learning_rate")
        self._lr_update = tf.assign(self.lr, self.new_lr)
Ejemplo n.º 9
0
def test(args):
    # Initialize the network
    model = baseline(args.side)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    model.load_state_dict(torch.load(args.model_root))
    print(model)

    model.eval()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    outdir = args.out_dir

    if args.is_savemaps:
        hook_conv5 = SimpleHook(model.layer4)

    # Initialize DataLoader
    Dataset = BatchLoader(
        imageRoot=args.imageroot,
        gtRoot=args.gtroot,
        reasonRoot=args.reasonroot,
    )
    dataloader = DataLoader(Dataset,
                            batch_size=int(args.batch_size),
                            num_workers=0,
                            shuffle=False)

    AccuracyArr = []
    AccOverallArr = []
    RandomAcc = []
    ReasonAcc = []

    SaveFilename = (outdir + 'TestingLog.txt')
    TestingLog = open(SaveFilename, 'w')
    print('Save to ', SaveFilename)
    TestingLog.write(str(args) + '\n')

    for i, dataBatch in enumerate(dataloader):
        # Read data
        img_cpu = dataBatch['img']
        imBatch = img_cpu.to(device)
        ori_img_cpu = dataBatch['ori_img']

        target_cpu = dataBatch['target']
        targetBatch = target_cpu.to(device)
        if args.side:
            reason_cpu = dataBatch['reason']
            reasonBatch = reason_cpu.to(device)

            # Prediction
            pred, pred_reason = model(imBatch)
        else:
            pred = model(imBatch)

        if args.is_savemaps:
            hooked_features = hook_conv5.output.data
            hooked_features = torch.mean(torch.mean(hooked_features, dim=0),
                                         dim=0)
            # print(hooked_features.shape)
            new_img = attention(
                ori_img_cpu.squeeze(0).data.numpy(),
                hooked_features.cpu().data.numpy())
            plt.imsave((outdir + 'att_maps/' + str(i) + '.jpg'), new_img)

        # Calculate accuracy
        predict = torch.sigmoid(pred) > 0.5
        f1 = f1_score(target_cpu.data.numpy(),
                      predict.cpu().data.numpy(),
                      average=None)
        f1_overall = f1_score(target_cpu.data.numpy(),
                              predict.cpu().data.numpy(),
                              average='samples')

        predict_reason = torch.sigmoid(pred_reason) > 0.5
        f1_reason = f1_score(reason_cpu.data.numpy(),
                             predict_reason.cpu().data.numpy(),
                             average='samples')

        # print("f1 score:{}".format(f1))
        AccuracyArr.append(f1)
        AccOverallArr.append(f1_overall)
        # print(AccuracyArr)
        ReasonAcc.append(f1_reason)

        # random guess
        random = torch.randint(0, 2, (args.batch_size, 5))
        random[:, 4] = 0
        random_f1 = f1_score(target_cpu.data.numpy(),
                             random.cpu().data.numpy(),
                             average=None)
        RandomAcc.append(random_f1)

        print('prediction logits:', pred)
        print('prediction action: \n {}'.format(predict))
        print('ground truth: \n', targetBatch.cpu().data.numpy())
        print('Iteration {}: F1 {} Accumulated F1 {}'.format(
            i, AccuracyArr[-1], np.mean(np.array(AccuracyArr), axis=0)))

        TestingLog.write('prediction logits:' + str(pred) + '\n')
        TestingLog.write('prediction action: \n {}'.format(predict) + '\n')
        TestingLog.write('ground truth: \n' +
                         str(targetBatch.cpu().data.numpy()) + '\n')
        TestingLog.write('Iteration {}: F1 {} Accumulated F1 {}'.format(
            i, AccuracyArr[-1], np.mean(np.array(AccuracyArr), axis=0)) + '\n')
        TestingLog.write('\n')

    print("Random guess acc:{}".format(np.mean(np.array(RandomAcc), axis=0)))
    print("Overall acc:{}".format(np.mean(np.array(AccOverallArr), axis=0)))
    print("Reason acc:{}".format(np.mean(np.array(ReasonAcc), axis=0)))
    TestingLog.write(
        "Random guess acc:{}".format(np.mean(np.array(RandomAcc), axis=0)) +
        '\n')
    TestingLog.write(
        "Overall acc:{}".format(np.mean(np.array(AccOverallArr), axis=0)) +
        '\n')

    TestingLog.close()
def test(cfg, args):
    # torch.cuda.set_device(5)

    # Initialize the network
    model = build_detection_model(cfg)
    print(model)
    model.eval()
    #print(model)

    # model load weights
    model.load_state_dict(torch.load(args.model_root))
    # model.load_state_dict(torch.load(cfg.MODEL.WEIGHT))

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    outdir = os.path.join(cfg.OUTPUT_DIR, 'inference/')
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # if args.is_savemaps:
    #     print(model.predictor)
    #     hook_conv5 = SimpleHook(model.predictor.relu_glob1)

    # Initialize DataLoader
    Dataset = BatchLoader(imageRoot=args.imageroot,
                          gtRoot=args.gtroot,
                          reasonRoot=args.reasonroot,
                          cropSize=(args.imHeight, args.imWidth))
    dataloader = DataLoader(Dataset,
                            batch_size=int(args.batch_size),
                            num_workers=24,
                            shuffle=False)

    AccOverallArr = []
    TargetArr = []
    PredArr = []
    RandomArr = []

    AccOverallReasonArr = []
    TargetReasonArr = []
    PredReasonArr = []
    RandomReasonArr = []

    SaveFilename = (outdir + 'TestingLog.txt')
    TestingLog = open(SaveFilename, 'w')
    print('Save to ', SaveFilename)
    TestingLog.write(str(args) + '\n')

    count = dataloader.__len__()
    for i, dataBatch in enumerate(dataloader):
        print('Finished: {} / {}'.format(i, count))
        print('Finished: %.2f%%' % (i / count * 100))
        # Read data
        with torch.no_grad():
            img_cpu = dataBatch['img']
            imBatch = img_cpu.to(device)
            ori_img_cpu = dataBatch['ori_img']

            target_cpu = dataBatch['target']
            targetBatch = target_cpu.to(device)
            if cfg.MODEL.SIDE:
                reason_cpu = dataBatch['reason']
                reasonBatch = reason_cpu.to(device)
                if not args.is_savemaps:
                    pred, pred_reason = model(imBatch)
                else:
                    hook_conv5 = SimpleHook(model.predictor.relu_glob1)
                    pred, pred_reason, selected_boxes = model(imBatch)
            else:
                if not args.is_savemaps:
                    pred = model(imBatch)
                else:
                    hook_conv5 = SimpleHook(model.predictor.relu_glob1)
                    pred, selected_boxes = model(imBatch)

        # if i == 0: # estimate the model size
        #     modelsize(model, imBatch)
        # pred, selected_boxes = model(imBatch)
        # DrawBbox(ori_img_cpu[0], selected_boxes[0], outdir, i)

        # torch.cuda.empty_cache()
        if args.is_savemaps:

            hooked_features = hook_conv5.output.data
            print("hooked_feature:", hooked_features.shape)
            hooked_features = torch.mean(torch.mean(hooked_features, dim=0),
                                         dim=0)
            new_img = attention(
                ori_img_cpu.squeeze(0).data.numpy(),
                hooked_features.cpu().data.numpy(),
                [hooked_features.shape[-1], hooked_features.shape[-1]])
            # plt.imsave((outdir + 'att_maps/' + str(i) + '_att.jpg'), new_img)
            DrawBbox(new_img, selected_boxes[0], outdir, i)

        # Calculate accuracy
        predict = torch.sigmoid(pred) > 0.5
        # torch.cuda.empty_cache()
        # print(target_cpu.data.numpy().shape)
        # print(predict.cpu().data.numpy().shape)
        TargetArr.append(target_cpu.data.numpy())
        PredArr.append(predict.cpu().data.numpy())
        f1_overall = f1_score(target_cpu.data.numpy(),
                              predict.cpu().data.numpy(),
                              average='samples')
        AccOverallArr.append(f1_overall)

        # random guess
        random = np.random.randint(0, 2, (predict.shape[0], predict.shape[1]))
        RandomArr.append(random)

        if cfg.MODEL.SIDE:
            predict_reason = torch.sigmoid(pred_reason) > 0.5
            TargetReasonArr.append(reason_cpu.data.numpy())
            PredReasonArr.append(predict_reason.cpu().data.numpy())
            f1_overall = f1_score(reason_cpu.data.numpy(),
                                  predict_reason.cpu().data.numpy(),
                                  average='samples')
            AccOverallReasonArr.append(f1_overall)

            # random guess
            random = np.random.randint(
                0, 2, (predict_reason.shape[0], predict_reason.shape[1]))
            RandomReasonArr.append(random)

        print('prediction logits:', pred)
        print('prediction action: \n {}'.format(predict))
        print('ground truth: \n', targetBatch.cpu().data.numpy())
        print('Accumulated Overall Action acc: ', np.mean(AccOverallArr))

        TestingLog.write('Iter ' + str(i) + '\n')
        TestingLog.write('prediction logits:' + str(pred) + '\n')
        TestingLog.write('prediction action: \n {}'.format(predict) + '\n')
        TestingLog.write('ground truth: \n' +
                         str(targetBatch.cpu().data.numpy()) + '\n')
        if cfg.MODEL.SIDE:
            print('prediction reason: \n {}'.format(predict_reason))
            print('ground truth: \n', reason_cpu.data.numpy())
            print('Accumulated Overall Reason acc: ',
                  np.mean(AccOverallReasonArr))

            TestingLog.write(
                'prediction reason: \n {}'.format(predict_reason) + '\n')
            TestingLog.write('ground truth: \n' +
                             str(reason_cpu.data.numpy()) + '\n')

        TestingLog.write('\n')

    TargetArr = List2Arr(TargetArr)
    PredArr = List2Arr(PredArr)
    RandomArr = List2Arr(RandomArr)

    print(TargetArr)
    print(PredArr)
    f1_pred = f1_score(TargetArr, PredArr, average=None)
    f1_rand = f1_score(TargetArr, RandomArr, average=None)

    # print("Random guess acc:{}".format(np.mean(np.array(RandomAcc),axis=0)))
    print("Action Random guess acc:{}".format(f1_rand))
    print("Action Category Acc:{}".format(f1_pred))
    print("Action Average Acc:{}".format(np.mean(f1_pred)))
    print("Action Overall acc:{}".format(
        np.mean(np.array(AccOverallArr), axis=0)))

    TestingLog.write("Action Random guess acc:{}".format(f1_rand))
    TestingLog.write("Action Category Acc:{}".format(f1_pred))
    TestingLog.write("Action Average Acc:{}".format(np.mean(f1_pred)))
    TestingLog.write("Action Overall acc:{}".format(
        np.mean(np.array(AccOverallArr), axis=0)))

    if cfg.MODEL.SIDE:
        TargetReasonArr = List2Arr(TargetReasonArr)
        PredReasonArr = List2Arr(PredReasonArr)
        RandomReasonArr = List2Arr(RandomReasonArr)

        f1_pred_reason = f1_score(TargetReasonArr, PredReasonArr, average=None)
        f1_pred_rand = f1_score(TargetReasonArr, RandomReasonArr, average=None)

        print("Reason Random guess acc:{}".format(f1_pred_rand))
        print("Reason Category Acc:{}".format(f1_pred_reason))
        print("Reason Average Acc:{}".format(np.mean(f1_pred_reason)))
        print("Reason Overall Acc:{}".format(
            np.mean(np.array(AccOverallReasonArr), axis=0)))
        TestingLog.write("Reason Random guess acc:{}".format(f1_pred_rand))
        TestingLog.write("Reason Category Acc:{}".format(f1_pred_reason))
        TestingLog.write("Reason Average Acc:{}".format(
            np.mean(f1_pred_reason)))
        TestingLog.write("Reason Overall Acc:{}".format(
            np.mean(np.array(AccOverallReasonArr), axis=0)))
Ejemplo n.º 11
0
    def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, attention_dim, rnn_size, num_rnn_layers, num_classes, max_grad_norm, dropout = 1., l2_reg_lambda=0.0, adjust_weight=False,label_weight=[],is_training=True):
        # define input variable
        self.keep_prob = dropout
        self.batch_size = batch_size
        self.embeddings = embeddings
        self.embedding_size = embedding_size
        self.attention_dim = attention_dim
        self.num_classes = num_classes
        self.adjust_weight = adjust_weight
        self.label_weight = label_weight
        self.rnn_size = rnn_size
        self.num_rnn_layers = num_rnn_layers
        self.num_unroll_steps = num_unroll_steps
        self.l2_reg_lambda = l2_reg_lambda
        self.max_grad_norm = max_grad_norm
        self.is_training = is_training

        self.input_data=tf.placeholder(tf.int32,[None,self.num_unroll_steps])
        self.target = tf.placeholder(tf.int64,[None])
        self.mask_x = tf.placeholder(tf.float32,[self.num_unroll_steps,None])

        #build BILSTM network
        # forward rnn
        #fw_lstm_cell = tf.nn.rnn_cell.GRUCell(self.rnn_size)
        fw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_size)
        #if self.is_training and self.keep_prob < 1:
        #    fw_lstm_cell =  tf.nn.rnn_cell.DropoutWrapper(
        #        fw_lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob = self.keep_prob
        #    )

        fw_cell = tf.nn.rnn_cell.MultiRNNCell([fw_lstm_cell] * self.num_rnn_layers, state_is_tuple=True)
        # backforward rnn
        #bw_lstm_cell = tf.nn.rnn_cell.GRUCell(self.rnn_size)
        bw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_size)
        #if self.is_training and self.keep_prob < 1:
        #    bw_lstm_cell =  tf.nn.rnn_cell.DropoutWrapper(
        #        bw_lstm_cell, input_keep_prob=self.keep_prob, output_keep_prob = self.keep_prob
        #    )

        bw_cell = tf.nn.rnn_cell.MultiRNNCell([bw_lstm_cell] * self.num_rnn_layers, state_is_tuple=True)

        #embedding layer
        with tf.device("/cpu:0"),tf.name_scope("embedding_layer"):
            self.embeddings = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="embeddings")
            inputs=tf.nn.embedding_lookup(self.embeddings, self.input_data)

        # dropout
        if self.is_training and self.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, self.keep_prob)

        inputs = [tf.squeeze(input, [1]) for input in tf.split(1, self.num_unroll_steps, inputs)]

        out_put, _, _ = tf.nn.bidirectional_rnn(fw_cell, bw_cell, inputs, dtype=tf.float32)

        out_put = tf.transpose(out_put, perm=[1, 0, 2])#(batch_size, steps, rnn_size*2)

        output = attention(out_put, self.attention_dim, self.l2_reg_lambda)
        #output = tf.squeeze(out_put[:, -1, :])
            
        # dropout
        if self.is_training and self.keep_prob < 1:
            output = tf.nn.dropout(output, self.keep_prob)
        #out_put = out_put * self.mask_x[:,:,None]

        #with tf.name_scope("mean_pooling_layer"):
        #    out_put = tf.reduce_sum(out_put,0)/(tf.reduce_sum(self.mask_x,0)[:,None])

        with tf.name_scope("Softmax_layer_and_output"):
            softmax_w = tf.get_variable("softmax_w", initializer=tf.truncated_normal([2 * self.rnn_size, self.num_classes], stddev=0.1))
            softmax_b = tf.get_variable("softmax_b", initializer=tf.constant(0., shape=[1]))
            self.logits = tf.matmul(output, softmax_w) + softmax_b
            #if self.l2_reg_lambda>0:
            #    l2_loss += tf.nn.l2_loss(softmax_w)
            #    l2_loss += tf.nn.l2_loss(softmax_b)
            #    weight_decay = tf.mul(l2_loss, self.l2_reg_lambda, name='l2_loss')
            #    tf.add_to_collection('losses', weight_decay)

        with tf.name_scope("loss"):
            self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(self.logits, self.target)
            #tf.add_to_collection('losses', self.loss)
            #total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
            self.cost = tf.reduce_mean(self.loss)

        with tf.name_scope("accuracy"):
            self.prediction = tf.argmax(self.logits,1)
            correct_prediction = tf.equal(self.prediction,self.target)
            self.correct_num=tf.reduce_sum(tf.cast(correct_prediction,tf.float32))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32),name="accuracy")

        #add summary
        loss_summary = tf.scalar_summary("loss",self.cost)
        #add summary
        accuracy_summary=tf.scalar_summary("accuracy_summary",self.accuracy)

        if not is_training:
            return

        self.globle_step = tf.Variable(0,name="globle_step",trainable=False)
        self.lr = tf.Variable(0.0,trainable=False)

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                      self.max_grad_norm)


        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in zip(grads, tvars):
            if g is not None:
                grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        self.grad_summaries_merged = tf.merge_summary(grad_summaries)

        self.summary =tf.merge_summary([loss_summary,accuracy_summary,self.grad_summaries_merged])

        #optimizer = tf.train.GradientDescentOptimizer(self.lr)
        optimizer = tf.train.AdamOptimizer(self.lr)
        optimizer.apply_gradients(zip(grads, tvars))
        self.train_op=optimizer.apply_gradients(zip(grads, tvars))

        self.new_lr = tf.placeholder(tf.float32,shape=[],name="new_learning_rate")
        self._lr_update = tf.assign(self.lr,self.new_lr)