예제 #1
0
    def _get_metrics(self, inputs, outputs):
        metrics = {}
        fc_out = self._calc_logits(enc_out=outputs["enc_out"],
                                   seq_pos=inputs["tgt_pos"])
        #fc_out = self._calc_logits(outputs["enc_out"], outputs["checkpoints"], inputs["tgt_pos"])
        lm_loss = layers.softmax_with_cross_entropy(logits=fc_out,
                                                    label=inputs["tgt_pos"])
        need_cal = layers.not_equal(
            inputs["tgt_label"],
            layers.fill_constant(shape=[1], dtype="int64", value=1))
        need_cal = layers.cast(need_cal, self.dtype)
        mean_lm_loss = layers.reduce_sum(
            lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10)

        pooled_out = self._get_pooled_output(outputs["enc_out"],
                                             inputs["label_pos"])
        nsp_fc_out = layers.fc(input=pooled_out,
                               size=2,
                               param_attr=fluid.ParamAttr(
                                   name="next_sent_fc.w_0",
                                   initializer=self.param_initializer),
                               bias_attr="next_sent_fc.b_0")
        nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy(
            logits=nsp_fc_out, label=inputs["label"], return_softmax=True)

        nsp_acc = layers.accuracy(nsp_softmax, inputs["label"])
        mean_nsp_loss = layers.mean(nsp_loss)

        metrics["loss"] = mean_lm_loss + mean_nsp_loss
        metrics["lm_loss"] = mean_lm_loss
        metrics["nsp_loss"] = mean_nsp_loss
        metrics["nsp_acc"] = nsp_acc
        return metrics
예제 #2
0
    def forward(self):
        """forward"""
        features_list = [self.gw.node_feat["attr"]]

        for i in range(self.num_layers):
            h = gin(self.gw,
                    features_list[i],
                    hidden_size=self.hidden_size,
                    activation="relu",
                    name="gin_%s" % (i),
                    init_eps=0.0,
                    train_eps=self.train_eps)

            h = fl.batch_norm(h)
            h = fl.relu(h)

            features_list.append(h)

        output = 0
        for i, h in enumerate(features_list):
            pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type)
            drop_h = fl.dropout(pooled_h,
                                self.dropout_prob,
                                dropout_implementation="upscale_in_train")
            output += fl.fc(drop_h,
                            size=self.num_class,
                            act=None,
                            param_attr=fluid.ParamAttr(name="final_fc_%s" %
                                                       (i)))

        # calculate loss
        self.loss = fl.softmax_with_cross_entropy(output, self.labels)
        self.loss = fl.reduce_mean(self.loss)
        self.acc = fl.accuracy(fl.softmax(output), self.labels)
예제 #3
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        pooled_out = self._get_pooled_output(outputs["enc_out"])
        cls_logits = self._get_classifier_output(pooled_out,
                                                 num_classes=self.num_classes,
                                                 name="cls")
        cls_loss, cls_softmax = layers.softmax_with_cross_entropy(
            logits=cls_logits, label=inputs["label"], return_softmax=True)

        cls_acc = layers.accuracy(cls_softmax, inputs["label"])
        mean_cls_loss = layers.mean(cls_loss)

        metrics["loss"] = mean_cls_loss
        metrics["cls_loss"] = mean_cls_loss
        metrics["cls_acc"] = cls_acc

        # statistics for recall & precision & f1
        if self.num_classes == 2:
            pred = layers.argmax(cls_softmax, axis=1)
            label = layers.squeeze(inputs["label"], axes=[1])
            metrics["stat_tp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 1).astype("float32"))
            metrics["stat_fp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 0).astype("float32"))
            metrics["stat_tn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 0).astype("float32"))
            metrics["stat_fn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 1).astype("float32"))
        return metrics
예제 #4
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"])
        lm_loss = layers.softmax_with_cross_entropy(logits=tgt_logits, label=inputs["tgt_label"])
        need_cal = layers.not_equal(
            inputs["tgt_label"], layers.fill_constant(shape=[1], dtype="int64", value=1)
        )
        need_cal = layers.cast(need_cal, self.dtype)
        mean_lm_loss = layers.reduce_sum(lm_loss * need_cal) / (layers.reduce_sum(need_cal) + 1e-10)

        pooled_out = self._get_pooled_output(outputs["enc_out"], inputs["label_idx"])
        nsp_logits = self._get_classifier_output(pooled_out, name="next_sent")
        nsp_loss, nsp_softmax = layers.softmax_with_cross_entropy(
            logits=nsp_logits, label=inputs["label"], return_softmax=True)

        nsp_acc = layers.accuracy(nsp_softmax, inputs["label"])
        mean_nsp_loss = layers.mean(nsp_loss)

        loss = mean_nsp_loss
        if self.use_mlm:
            loss = loss + mean_lm_loss
            metrics["token_lm_loss"] = mean_lm_loss
        metrics["loss"] = loss
        metrics["nsp_loss"] = mean_nsp_loss
        metrics["nsp_acc"] = nsp_acc
        return metrics
예제 #5
0
    def forward(self, is_test=False):
        """
        Build the network.
        """
        graph_wrapper = GraphWrapper(name="graph",
                node_feat=[
                    ('atom_type', [None, 1], "int64"), 
                    ('chirality_tag', [None, 1], "int64")],
                edge_feat=[
                    ('bond_type', [None, 1], "int64"),
                    ('bond_direction', [None, 1], "int64")])
        masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64")
        masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        masked_node_repr = layers.gather(node_repr, masked_node_indice)
        logits = layers.fc(masked_node_repr, 
                size=len(CompoundConstants.atom_num_list),
                name="masked_node_logits")

        loss, pred = layers.softmax_with_cross_entropy(
                logits, masked_node_label, return_softmax=True)
        loss = layers.reduce_mean(loss)
        acc = layers.accuracy(pred, masked_node_label)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
예제 #6
0
    def _compute_acc(self, pred):

        label = layers.cast(self.label, dtype="int64")
        label = layers.reshape(label, [-1, 1])
        pred = layers.reshape(pred, [-1, 2])

        acc = layers.accuracy(pred, label)

        return acc
예제 #7
0
 def matrixwise_loss(self):
     """listwise model"""
     self.logits = L.matmul(
         self.query_repr, self.poi_repr, transpose_y=True)
     self.score = L.softmax(self.logits)
     self.loss = L.softmax_with_cross_entropy(self.logits, self.labels)
     self.loss = L.reduce_mean(self.loss)
     self.acc = L.accuracy(L.softmax(self.logits), self.labels)
     self.metrics = [self.loss, self.acc]
예제 #8
0
    def __init__(self, input, label, k=20):
        """ """
        kwargs = locals()
        del kwargs['self']
        self.k = k

        if not isinstance(input, Variable):
            raise ValueError("input must be Variable, but received %s" %
                             type(input))
        if not isinstance(label, Variable):
            raise ValueError("label must be Variable, but received %s" %
                             type(label))

        helper = LayerHelper("PaddleRec_RecallK", **kwargs)
        batch_accuracy = accuracy(input, label, self.k)
        global_ins_cnt, _ = helper.create_or_get_global_variable(
            name="ins_cnt", persistable=True, dtype='float32', shape=[1])
        global_pos_cnt, _ = helper.create_or_get_global_variable(
            name="pos_cnt", persistable=True, dtype='float32', shape=[1])

        for var in [global_ins_cnt, global_pos_cnt]:
            helper.set_variable_initializer(
                var, Constant(value=0.0, force_cpu=True))

        tmp_ones = fluid.layers.fill_constant(shape=fluid.layers.shape(label),
                                              dtype="float32",
                                              value=1.0)
        batch_ins = fluid.layers.reduce_sum(tmp_ones)
        batch_pos = batch_ins * batch_accuracy

        helper.append_op(type="elementwise_add",
                         inputs={
                             "X": [global_ins_cnt],
                             "Y": [batch_ins]
                         },
                         outputs={"Out": [global_ins_cnt]})

        helper.append_op(type="elementwise_add",
                         inputs={
                             "X": [global_pos_cnt],
                             "Y": [batch_pos]
                         },
                         outputs={"Out": [global_pos_cnt]})

        self.acc = global_pos_cnt / global_ins_cnt

        self._global_metric_state_vars = dict()
        self._global_metric_state_vars['ins_cnt'] = (global_ins_cnt.name,
                                                     "float32")
        self._global_metric_state_vars['pos_cnt'] = (global_pos_cnt.name,
                                                     "float32")

        metric_name = "Acc(Recall@%d)" % self.k
        self.metrics = dict()
        self.metrics["InsCnt"] = global_ins_cnt
        self.metrics["RecallCnt"] = global_pos_cnt
        self.metrics[metric_name] = self.acc
예제 #9
0
 def listwise_hinge_loss(self):
     """listwise hinge loss model"""
     self.poi_repr = L.l2_normalize(self.poi_repr, -1)
     self.query_repr = L.l2_normalize(self.query_repr, -1)
     pos_logits = L.reduce_sum(self.query_repr * self.poi_repr, -1, keep_dim=True)
     neg_logits = L.matmul(self.query_repr, self.poi_repr, transpose_y = True)
     self.loss = L.reduce_mean(L.relu(neg_logits - pos_logits + 0.3))
     self.acc = L.accuracy(L.softmax(neg_logits), self.labels)
     self.metrics = [self.loss, self.acc]
예제 #10
0
    def create_acc_op(self, predict, label):
        """compute accuracy with tensor

         Args:
         predict: model output tensor activated by softmax
         label: a non-sparse tensor

         Returns:
         acc: acc tensor
         """
        accuracy = FL.accuracy(input=predict, label=label)
        return accuracy
예제 #11
0
    def listwise_loss(self, args):
        """listwise model"""
        self.logits = L.matmul(
            self.query_repr, self.poi_repr, transpose_y=True)
        if self.norm_score:
            self.logits = L.softsign(self.logits)

        if args.scale_softmax:
            scale = L.create_parameter(shape=[1], dtype="float32", name="final_scale", default_initializer=F.initializer.ConstantInitializer(value=1.0))
            bias = L.create_parameter(shape=[1], dtype="float32", name="final_bias", default_initializer=F.initializer.ConstantInitializer(value=0.0))
            self.logits = self.logits * scale * scale + bias

        self.score = L.softmax(self.logits)
        self.loss = L.softmax_with_cross_entropy(self.logits, self.labels)
        self.loss = L.reduce_mean(self.loss)
        self.acc = L.accuracy(L.softmax(self.logits), self.labels)
        self.metrics = [self.loss, self.acc]
예제 #12
0
    def loss_labels(self, outputs, targets, indices, num_boxes, log=True):
        """
        Classification loss (NLL)
        targets dict must contain the key "labels" containing a tensor of dim [nb_target_boxes]
        """
        assert "pred_logits" in outputs
        num_classes_plus_1 = outputs["pred_logits"].shape[-1]
        src_logits = outputs["pred_logits"]  # [bs, num_queries, num_classes]

        idx = self._get_src_permutation_idx(indices)
        target_classes_o = [
            t["labels"].numpy()[J.numpy()]
            for t, (_, J) in zip(targets, indices)
        ]
        target_classes_o = [dg.to_variable(t) for t in target_classes_o]
        target_classes_o = L.concat(target_classes_o)  # [bs * num_object]
        target_classes = T.creation.full(src_logits.shape[:2],
                                         self.num_classes).astype(
                                             "int64")  # [bs, num_queries]

        idx = np.array([idx[0].numpy(), idx[1].numpy()])
        target_classes = target_classes.numpy()
        target_classes[idx[0], idx[1]] = target_classes_o.numpy()
        target_classes = dg.to_variable(target_classes)

        target_classes = L.unsqueeze(target_classes, axes=[2])
        loss_ce = L.softmax_with_cross_entropy(
            src_logits, target_classes)  # (bs, num_queries, 1)
        loss_weight = np.ones(loss_ce.shape).astype("float32")
        loss_weight[(
            target_classes == self.num_classes).numpy()] = self.eos_coef
        loss_ce = loss_ce * dg.to_variable(loss_weight)
        loss_ce = L.reduce_mean(loss_ce)
        losses = {'loss_ce': loss_ce}

        if log:
            # TODO this should probably be a separate loss, not hacked in this one here
            out_logits = src_logits.numpy()[idx[0], idx[1], :]
            out_logits = dg.to_variable(
                out_logits)  # [num_objects, num_classes_plus_1]
            target_labels = L.reshape(target_classes_o, (-1, 1))
            losses['class_error'] = 100 - 100 * L.accuracy(
                out_logits, target_labels)
        return losses
예제 #13
0
def network(items_num, hidden_size, step, bs):
    stdv = 1.0 / math.sqrt(hidden_size)

    items = fluid.data(name="items", shape=[bs, -1],
                       dtype="int64")  #[batch_size, uniq_max]
    seq_index = fluid.data(name="seq_index", shape=[bs, -1, 2],
                           dtype="int32")  #[batch_size, seq_max, 2]
    last_index = fluid.data(name="last_index", shape=[bs, 2],
                            dtype="int32")  #[batch_size, 2]
    adj_in = fluid.data(name="adj_in", shape=[bs, -1, -1],
                        dtype="float32")  #[batch_size, seq_max, seq_max]
    adj_out = fluid.data(name="adj_out", shape=[bs, -1, -1],
                         dtype="float32")  #[batch_size, seq_max, seq_max]
    mask = fluid.data(name="mask", shape=[bs, -1, 1],
                      dtype="float32")  #[batch_size, seq_max, 1]
    label = fluid.data(name="label", shape=[bs, 1],
                       dtype="int64")  #[batch_size, 1]

    datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
    py_reader = fluid.io.DataLoader.from_generator(capacity=256,
                                                   feed_list=datas,
                                                   iterable=False)
    feed_datas = datas

    items_emb = fluid.embedding(
        input=items,
        param_attr=fluid.ParamAttr(name="emb",
                                   initializer=fluid.initializer.Uniform(
                                       low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[batch_size, uniq_max, h]

    pre_state = items_emb
    for i in range(step):
        pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
        state_in = layers.fc(
            input=pre_state,
            name="state_in",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]
        state_out = layers.fc(
            input=pre_state,
            name="state_out",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]

        state_adj_in = layers.matmul(adj_in,
                                     state_in)  #[batch_size, uniq_max, h]
        state_adj_out = layers.matmul(adj_out,
                                      state_out)  #[batch_size, uniq_max, h]

        gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

        gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
        gru_fc = layers.fc(input=gru_input,
                           name="gru_fc",
                           size=3 * hidden_size,
                           bias_attr=False)
        pre_state, _, _ = fluid.layers.gru_unit(input=gru_fc,
                                                hidden=layers.reshape(
                                                    x=pre_state,
                                                    shape=[-1, hidden_size]),
                                                size=3 * hidden_size)

    final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
    seq = layers.gather_nd(final_state, seq_index)
    last = layers.gather_nd(final_state, last_index)

    seq_fc = layers.fc(
        input=seq,
        name="seq_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, seq_max, h]
    last_fc = layers.fc(
        input=last,
        name="last_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=1,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[bathc_size, h]

    seq_fc_t = layers.transpose(seq_fc, perm=[1, 0,
                                              2])  #[seq_max, batch_size, h]
    add = layers.elementwise_add(seq_fc_t, last_fc)  #[seq_max, batch_size, h]
    b = layers.create_parameter(
        shape=[hidden_size],
        dtype='float32',
        default_initializer=fluid.initializer.Constant(value=0.0))  #[h]
    add = layers.elementwise_add(add, b)  #[seq_max, batch_size, h]

    add_sigmoid = layers.sigmoid(add)  #[seq_max, batch_size, h]
    add_sigmoid = layers.transpose(add_sigmoid,
                                   perm=[1, 0, 2])  #[batch_size, seq_max, h]

    weight = layers.fc(
        input=add_sigmoid,
        name="weight_fc",
        size=1,
        act=None,
        num_flatten_dims=2,
        bias_attr=False,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, seq_max, 1]
    weight *= mask
    weight_mask = layers.elementwise_mul(seq, weight,
                                         axis=0)  #[batch_size, seq_max, h]
    global_attention = layers.reduce_sum(weight_mask, dim=1)  #[batch_size, h]

    final_attention = layers.concat([global_attention, last],
                                    axis=1)  #[batch_size, 2*h]
    final_attention_fc = layers.fc(
        input=final_attention,
        name="final_attention_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, h]

    all_vocab = layers.create_global_var(shape=[items_num - 1],
                                         value=0,
                                         dtype="int64",
                                         persistable=True,
                                         name="all_vocab")

    all_emb = fluid.embedding(input=all_vocab,
                              param_attr=fluid.ParamAttr(
                                  name="emb",
                                  initializer=fluid.initializer.Uniform(
                                      low=-stdv, high=stdv)),
                              size=[items_num, hidden_size])  #[all_vocab, h]

    logits = layers.matmul(x=final_attention_fc, y=all_emb,
                           transpose_y=True)  #[batch_size, all_vocab]
    softmax = layers.softmax_with_cross_entropy(logits=logits,
                                                label=label)  #[batch_size, 1]
    loss = layers.reduce_mean(softmax)  # [1]
    acc = layers.accuracy(input=logits, label=label, k=50)
    return loss, acc, py_reader, feed_datas, logits
예제 #14
0
def network(batch_size, items_num, hidden_size, step, rate):
    stdv = 1.0 / math.sqrt(hidden_size)

    items = layers.data(
        name="items",
        shape=[batch_size, -1, 1],
        dtype="int64",
        append_batch_size=False)  #[bs, uniq_max, 1]
    seq_index = layers.data(
        name="seq_index",
        shape=[batch_size, -1],
        dtype="int64",
        append_batch_size=False)  #[-1(seq_max)*batch_size, 1]
    last_index = layers.data(
        name="last_index",
        shape=[batch_size],
        dtype="int64",
        append_batch_size=False)  #[batch_size, 1]
    adj_in = layers.data(
        name="adj_in",
        shape=[batch_size, -1, -1],
        dtype="float32",
        append_batch_size=False)
    adj_out = layers.data(
        name="adj_out",
        shape=[batch_size, -1, -1],
        dtype="float32",
        append_batch_size=False)
    mask = layers.data(
        name="mask",
        shape=[batch_size, -1, 1],
        dtype="float32",
        append_batch_size=False)
    label = layers.data(
        name="label",
        shape=[batch_size, 1],
        dtype="int64",
        append_batch_size=False)

    items_emb = layers.embedding(
        input=items,
        is_sparse=True,
        param_attr=fluid.ParamAttr(
            name="emb",
            learning_rate=rate,
            initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[batch_size, uniq_max, h]
    data_feed = [items, seq_index, last_index, adj_in, adj_out, mask, label]

    pre_state = items_emb
    for i in range(step):
        pre_state = layers.reshape(
            x=pre_state, shape=[batch_size, -1, hidden_size])
        state_in = layers.fc(
            input=pre_state,
            name="state_in",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]
        state_out = layers.fc(
            input=pre_state,
            name="state_out",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]

        state_adj_in = layers.matmul(adj_in,
                                     state_in)  #[batch_size, uniq_max, h]
        state_adj_out = layers.matmul(adj_out,
                                      state_out)  #[batch_size, uniq_max, h]

        gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

        gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
        gru_fc = layers.fc(input=gru_input,
                           name="gru_fc",
                           size=3 * hidden_size,
                           bias_attr=False)
        pre_state, _, _ = fluid.layers.gru_unit(
            input=gru_fc,
            hidden=layers.reshape(
                x=pre_state, shape=[-1, hidden_size]),
            size=3 * hidden_size)

    final_state = pre_state
    seq_index = layers.reshape(seq_index, shape=[-1])
    seq = layers.gather(final_state, seq_index)  #[batch_size*-1(seq_max), h]
    last = layers.gather(final_state, last_index)  #[batch_size, h]

    seq = layers.reshape(
        seq, shape=[batch_size, -1, hidden_size])  #[batch_size, -1(seq_max), h]
    last = layers.reshape(
        last, shape=[batch_size, hidden_size])  #[batch_size, h]

    seq_fc = layers.fc(
        input=seq,
        name="seq_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, -1(seq_max), h]
    last_fc = layers.fc(input=last,
                        name="last_fc",
                        size=hidden_size,
                        bias_attr=False,
                        act=None,
                        num_flatten_dims=1,
                        param_attr=fluid.ParamAttr(
                            initializer=fluid.initializer.Uniform(
                                low=-stdv, high=stdv)))  #[bathc_size, h]

    seq_fc_t = layers.transpose(
        seq_fc, perm=[1, 0, 2])  #[-1(seq_max), batch_size, h]
    add = layers.elementwise_add(seq_fc_t,
                                 last_fc)  #[-1(seq_max), batch_size, h]
    b = layers.create_parameter(
        shape=[hidden_size],
        dtype='float32',
        default_initializer=fluid.initializer.Constant(value=0.0))  #[h]
    add = layers.elementwise_add(add, b)  #[-1(seq_max), batch_size, h]

    add_sigmoid = layers.sigmoid(add)  #[-1(seq_max), batch_size, h] 
    add_sigmoid = layers.transpose(
        add_sigmoid, perm=[1, 0, 2])  #[batch_size, -1(seq_max), h]

    weight = layers.fc(input=add_sigmoid,
                       name="weight_fc",
                       size=1,
                       act=None,
                       num_flatten_dims=2,
                       bias_attr=False,
                       param_attr=fluid.ParamAttr(
                           initializer=fluid.initializer.Uniform(
                               low=-stdv, high=stdv)))  #[batch_size, -1, 1]
    weight *= mask
    weight_mask = layers.elementwise_mul(seq, weight, axis=0)
    global_attention = layers.reduce_sum(weight_mask, dim=1)

    final_attention = layers.concat(
        [global_attention, last], axis=1)  #[batch_size, 2*h]
    final_attention_fc = layers.fc(
        input=final_attention,
        name="fina_attention_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, h]

    all_vocab = layers.create_global_var(
        shape=[items_num - 1, 1],
        value=0,
        dtype="int64",
        persistable=True,
        name="all_vocab")

    all_emb = layers.embedding(
        input=all_vocab,
        is_sparse=True,
        param_attr=fluid.ParamAttr(
            name="emb",
            learning_rate=rate,
            initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[all_vocab, h]

    logits = layers.matmul(
        x=final_attention_fc, y=all_emb,
        transpose_y=True)  #[batch_size, all_vocab]
    softmax = layers.softmax_with_cross_entropy(
        logits=logits, label=label)  #[batch_size, 1]
    loss = layers.reduce_mean(softmax)  # [1]
    #fluid.layers.Print(loss)
    acc = layers.accuracy(input=logits, label=label, k=20)
    return loss, acc, data_feed, [items_emb, all_emb]
예제 #15
0
    def forward(self, x, cls=None):
        # x is BxTxCxHxW 注意与2p1d网络输入格式不同
        # spatio-temporal video data
        b, t, c, h, w = x.shape
        # need to view it is B*TxCxHxW for 2D CNN
        # important to keep batch and time axis next to
        # eachother, so a simple view without tranposing is possible
        # 此处存疑,因为torch.dataloader作batch打包录入数据时,各类别是混起来的,而且同类视频间也不方便混起来的,因为要计算表示层光流
        x = reshape(x, shape=[b * t, c, h, w])

        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)

        # 插入FCF层

        # res = x  # F.avg_pool2d(x, (3, 1), 1, 0)  # x[:,:,1:-1].contiguous() F表示torch.nn.functional
        res = x
        x = self.flow_cmp(x)
        x = self.flow_layer.norm_img(x)

        # compute flow for 0,1,...,T-1
        #        and       1,2,...,T
        b_t, c, h, w = x.shape
        x = reshape(x, shape=[b, -1, c, h, w])  #将x拆解为BTCHW,后续要对T维度操作
        # 根据有无x=x+res,下面两句二选一
        x = pad(x, paddings=[0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        # t -= 1  # Representation Flow操作后,t少一帧
        u, v = self.flow_layer(reshape(x[:, :-1], shape=[-1, c, h, w]),
                               reshape(x[:, 1:], shape=[-1, c, h, w]))

        x = concat([u, v], axis=1)

        x = self.flow_conv(x)

        # Flow-of-flow
        x = self.flow_cmp2(x)
        x = self.flow_layer.norm_img(x)
        # compute flow for 0,1,...,T-1
        #        and       1,2,...,T
        b_t, c, h, w = x.shape
        x = reshape(x, shape=[b, -1, c, h, w])
        # 根据有无x=x+res,下面两句二选一
        x = pad(x, paddings=[0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        # t -= 1  # Representation Flow操作后,t少一帧
        u, v = self.flow_layer2(reshape(x[:, :-1], shape=[-1, c, h, w]),
                                reshape(x[:, 1:], shape=[-1, c, h, w]))
        x = concat([u, v], axis=1)

        x = self.flow_conv2(x)
        x = self.bnf(x)

        x = x + res
        x = leaky_relu(x)

        #

        x = self.layer3(x)
        x = self.layer4(x)

        #print(x.size())
        x = self.avgpool(x)

        x = reshape(x, shape=[x.shape[0], -1])
        x = self.dropout(x)

        # currently making dense, per-frame predictions
        x = self.fc(x)

        # so view as BxTxClass
        x = reshape(x, shape=[b, t, -1])
        # mean-pool over time
        x = reduce_mean(x, dim=1)  # temporal维度合并

        # return BxClass prediction
        if cls is not None:
            acc = float(accuracy(input=x, label=cls))
            return x, acc
        else:
            return x
예제 #16
0
h_conv = layers.conv2d(h_conv, num_filters=64, filter_size=(3, 3), act="relu")
h_pool = layers.pool2d(h_conv, pool_size=(2, 2))
h_dropout = layers.dropout(h_pool, dropout_prob=0.25)
h_flatten = layers.flatten(h_dropout)
h_fc = layers.fc(h_flatten,
                 size=128,
                 act="relu",
                 bias_attr=fluid.param_attr.ParamAttr(name="b_0"))
h_dropout2 = layers.dropout(h_fc, dropout_prob=0.25)
pred = layers.fc(h_dropout2,
                 size=num_classes,
                 act="softmax",
                 bias_attr=fluid.param_attr.ParamAttr(name="b_1"))

loss = layers.reduce_mean(layers.cross_entropy(input=pred, label=Y))
acc = layers.accuracy(input=pred, label=Y)

test_program = fluid.default_main_program().clone(for_test=True)

# define the optimizer
optimizer = fluid.optimizer.Adadelta(learning_rate=1.0, rho=0.95)

optimizer.minimize(loss)

# define the executor

exe = fluid.Executor(fluid.CUDAPlace(0))
exe.run(fluid.default_startup_program())

# define data reader
예제 #17
0
    def net(self, inputs, is_infer=False):
        if is_infer:
            bs = self.evaluate_batch_size
        else:
            bs = self.train_batch_size

        stdv = 1.0 / math.sqrt(self.hidden_size)

        def embedding_layer(input,
                            table_name,
                            emb_dim,
                            initializer_instance=None):
            emb = fluid.embedding(
                input=input,
                size=[self.dict_size, emb_dim],
                param_attr=fluid.ParamAttr(
                    name=table_name, initializer=initializer_instance))
            return emb

        sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
        items_emb = embedding_layer(inputs[0], "emb", self.hidden_size,
                                    sparse_initializer)
        pre_state = items_emb
        for i in range(self.step):
            pre_state = layers.reshape(
                x=pre_state, shape=[bs, -1, self.hidden_size])
            state_in = layers.fc(
                input=pre_state,
                name="state_in",
                size=self.hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]
            state_out = layers.fc(
                input=pre_state,
                name="state_out",
                size=self.hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]

            state_adj_in = layers.matmul(inputs[3],
                                         state_in)  # [batch_size, uniq_max, h]
            state_adj_out = layers.matmul(
                inputs[4], state_out)  # [batch_size, uniq_max, h]

            gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

            gru_input = layers.reshape(
                x=gru_input, shape=[-1, self.hidden_size * 2])
            gru_fc = layers.fc(input=gru_input,
                               name="gru_fc",
                               size=3 * self.hidden_size,
                               bias_attr=False)
            pre_state, _, _ = fluid.layers.gru_unit(
                input=gru_fc,
                hidden=layers.reshape(
                    x=pre_state, shape=[-1, self.hidden_size]),
                size=3 * self.hidden_size)

        final_state = layers.reshape(
            pre_state, shape=[bs, -1, self.hidden_size])
        seq = layers.gather_nd(final_state, inputs[1])
        last = layers.gather_nd(final_state, inputs[2])

        seq_fc = layers.fc(
            input=seq,
            name="seq_fc",
            size=self.hidden_size,
            bias_attr=False,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, h]
        last_fc = layers.fc(input=last,
                            name="last_fc",
                            size=self.hidden_size,
                            bias_attr=False,
                            act=None,
                            num_flatten_dims=1,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.Uniform(
                                    low=-stdv, high=stdv)))  # [bathc_size, h]

        seq_fc_t = layers.transpose(
            seq_fc, perm=[1, 0, 2])  # [seq_max, batch_size, h]
        add = layers.elementwise_add(seq_fc_t,
                                     last_fc)  # [seq_max, batch_size, h]
        b = layers.create_parameter(
            shape=[self.hidden_size],
            dtype='float32',
            default_initializer=fluid.initializer.Constant(value=0.0))  # [h]
        add = layers.elementwise_add(add, b)  # [seq_max, batch_size, h]

        add_sigmoid = layers.sigmoid(add)  # [seq_max, batch_size, h]
        add_sigmoid = layers.transpose(
            add_sigmoid, perm=[1, 0, 2])  # [batch_size, seq_max, h]

        weight = layers.fc(
            input=add_sigmoid,
            name="weight_fc",
            size=1,
            act=None,
            num_flatten_dims=2,
            bias_attr=False,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, 1]
        weight *= inputs[5]
        weight_mask = layers.elementwise_mul(
            seq, weight, axis=0)  # [batch_size, seq_max, h]
        global_attention = layers.reduce_sum(
            weight_mask, dim=1)  # [batch_size, h]

        final_attention = layers.concat(
            [global_attention, last], axis=1)  # [batch_size, 2*h]
        final_attention_fc = layers.fc(
            input=final_attention,
            name="final_attention_fc",
            size=self.hidden_size,
            bias_attr=False,
            act=None,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, h]

        # all_vocab = layers.create_global_var(
        #     shape=[items_num - 1],
        #     value=0,
        #     dtype="int64",
        #     persistable=True,
        #     name="all_vocab")
        all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32')
        all_vocab = fluid.layers.cast(
            x=fluid.layers.assign(all_vocab), dtype='int64')

        all_emb = fluid.embedding(
            input=all_vocab,
            param_attr=fluid.ParamAttr(
                name="emb",
                initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
            size=[self.dict_size, self.hidden_size])  # [all_vocab, h]

        logits = layers.matmul(
            x=final_attention_fc, y=all_emb,
            transpose_y=True)  # [batch_size, all_vocab]
        softmax = layers.softmax_with_cross_entropy(
            logits=logits, label=inputs[6])  # [batch_size, 1]
        self.loss = layers.reduce_mean(softmax)  # [1]
        self.acc = layers.accuracy(input=logits, label=inputs[6], k=20)

        self._cost = self.loss
        if is_infer:
            self._infer_results['acc'] = self.acc
            self._infer_results['loss'] = self.loss
            return

        self._metrics["LOSS"] = self.loss
        self._metrics["train_acc"] = self.acc
예제 #18
0
 def _compute_acc(self, output):
     acc = layers.accuracy(input=output, label=self.label)
     return acc