Esempio n. 1
0
    def forward(self, input):
        in_mean, in_var = layers.reduce_mean(input, dim=[2, 3],
                                             keep_dim=True), var(input,
                                                                 axis=[2, 3],
                                                                 keep_dim=True)
        debug_print('reduce_mean')
        out_in = (input - in_mean) / layers.sqrt(in_var + self.eps)
        debug_print('sqrt')
        ln_mean, ln_var = layers.reduce_mean(input,
                                             dim=[1, 2, 3],
                                             keep_dim=True), var(
                                                 input,
                                                 axis=[1, 2, 3],
                                                 keep_dim=True)
        debug_print('reduce_mean')
        out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps)
        debug_print('sqrt')
        out = layers.expand(self.rho, [input.shape[0], 1, 1, 1]) * out_in + (
            1 - layers.expand(self.rho, [input.shape[0], 1, 1, 1])) * out_ln
        debug_print('expand')
        out = out * layers.expand(self.gamma,
                                  [input.shape[0], 1, 1, 1]) + layers.expand(
                                      self.beta, [input.shape[0], 1, 1, 1])
        debug_print('expand')

        return out
Esempio n. 2
0
    def forward(self, input, gamma, beta):

        in_mean, in_var = reduce_mean(input, dim=[2, 3],
                                      keep_dim=True), my_var(input,
                                                             dim=[2, 3],
                                                             keep_dim=True)

        out_in = (input - in_mean) / sqrt(in_var + self.eps)

        ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3],
                                      keep_dim=True), my_var(input,
                                                             dim=[1, 2, 3],
                                                             keep_dim=True)

        out_ln = (input - ln_mean) / sqrt(ln_var + self.eps)

        ex_rho = expand(self.rho, (input.shape[0], 1, 1, 1))

        out = ex_rho * out_in + (1 - ex_rho) * out_ln

        gamma = unsqueeze(gamma, axes=2)
        gamma = unsqueeze(gamma, axes=3)
        beta = unsqueeze(beta, axes=2)
        beta = unsqueeze(beta, axes=3)
        out = out * gamma + beta

        return out
Esempio n. 3
0
    def forward(self, *args, **kwargs):
        """
        Args:
            start_pos (optional, `Variable` of shape [batch_size]): 
                token index of start of answer span in `context`
            end_pos (optional, `Variable` of shape [batch_size]): 
                token index of end of answer span in `context`
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            start_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of start position, use argmax(start_logit) to get start index
            end_logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of end position, use argmax(end_logit) to get end index
        """

        start_pos = kwargs.pop('start_pos', None)
        end_pos = kwargs.pop('end_pos', None)
        pooled, encoded = super(ErnieModelForQuestionAnswering,
                                self).forward(*args, **kwargs)
        encoded = self.dropout(encoded)
        encoded = self.classifier(encoded)
        start_logit, end_logits = L.unstack(encoded, axis=-1)
        if start_pos is not None and end_pos is not None:
            if len(start_pos.shape) == 1:
                start_pos = L.unsqueeze(start_pos, axes=[-1])
            if len(end_pos.shape) == 1:
                end_pos = L.unsqueeze(end_pos, axes=[-1])
            start_loss = L.softmax_with_cross_entropy(start_logit, start_pos)
            end_loss = L.softmax_with_cross_entropy(end_logits, end_pos)
            loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2.
        else:
            loss = None
        return loss, start_logit, end_logits
Esempio n. 4
0
def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None):
    dtype = convert_dtype(input.dtype)
    if dtype not in ["float32", "float64"]:
        raise ValueError("Layer tensor.var() only supports floating-point "
                         "dtypes, but received {}.".format(dtype))
    rank = len(input.shape)
    axes = axis if axis != None and axis != [] else range(rank)
    axes = [e if e >= 0 else e + rank for e in axes]
    inp_shape = input.shape if in_dygraph_mode() else layers.shape(input)
    mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name)
    tmp = layers.reduce_mean(
        (input - mean)**2, dim=axis, keep_dim=keepdim, name=name)

    if unbiased:
        n = 1
        for i in axes:
            n *= inp_shape[i]
        if not in_dygraph_mode():
            n = layers.cast(n, dtype)
            zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0)
            factor = where(n > 1.0, n / (n - 1.0), zero_const)
        else:
            factor = n / (n - 1.0) if n > 1.0 else 0.0
        tmp *= factor
    if out:
        layers.assign(input=tmp, output=out)
        return out
    else:
        return tmp
def layer_norm(x,
               begin_norm_axis=1,
               epsilon=1e-12,
               param_attr=None,
               bias_attr=None):
    """
    Replace build-in layer_norm op with this function
    """
    helper = LayerHelper('layer_norm', **locals())
    mean = layers.reduce_mean(x, dim=begin_norm_axis, keep_dim=True)
    shift_x = layers.elementwise_sub(x=x, y=mean, axis=0)
    variance = layers.reduce_mean(
        layers.square(shift_x), dim=begin_norm_axis, keep_dim=True)
    r_stdev = layers.rsqrt(variance + epsilon)
    norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0)

    param_shape = [reduce(lambda x, y: x * y, norm_x.shape[begin_norm_axis:])]
    param_dtype = norm_x.dtype
    scale = helper.create_parameter(
        attr=param_attr,
        shape=param_shape,
        dtype=param_dtype,
        default_initializer=fluid.initializer.Constant(1.))
    bias = helper.create_parameter(
        attr=bias_attr,
        shape=param_shape,
        dtype=param_dtype,
        is_bias=True,
        default_initializer=fluid.initializer.Constant(0.))

    out = layers.elementwise_mul(x=norm_x, y=scale, axis=-1)
    out = layers.elementwise_add(x=out, y=bias, axis=-1)

    return out
    def _collect_metrics(self, inputs, outputs):
        """ Calculate loss function by using inputs and outputs. """
        metrics = {}

        tgt_len = layers.reduce_sum(
            layers.reduce_sum(inputs["tgt_mask"], dim=1) - 1)
        tgt_len.stop_gradient = True

        label = inputs["tgt_token"][:, 1:]
        if self.label_smooth > 0:
            one_hot_label = layers.one_hot(label, self.num_token_embeddings)
            smooth_label = layers.label_smooth(one_hot_label,
                                               epsilon=self.label_smooth,
                                               dtype=self._dtype)
            nll = layers.cross_entropy(outputs["dec_pred"],
                                       smooth_label,
                                       soft_label=True,
                                       ignore_index=self.padding_idx)
        else:
            nll = layers.cross_entropy(outputs["dec_probs"],
                                       label,
                                       ignore_index=self.padding_idx)
        nll = layers.reduce_sum(nll, dim=1)
        token_nll = layers.reduce_sum(nll) / tgt_len
        nll = layers.reduce_mean(nll)
        metrics["nll"] = nll
        metrics["token_nll"] = token_nll
        loss = nll

        if self.num_latent > 0 and self.with_bow:
            bow_probs = F.unsqueeze(outputs["bow_probs"], [1])
            bow_probs = layers.expand(bow_probs, [1, label.shape[1], 1])
            if self.label_smooth > 0:
                bow = layers.cross_entropy(bow_probs,
                                           smooth_label,
                                           soft_label=True,
                                           ignore_index=self.padding_idx)
            else:
                bow = layers.cross_entropy(bow_probs,
                                           label,
                                           ignore_index=self.padding_idx)
            bow = layers.reduce_sum(bow, dim=1)
            token_bow = layers.reduce_sum(bow) / tgt_len
            bow = layers.reduce_mean(bow)
            metrics["bow"] = bow
            metrics["token_bow"] = token_bow
            loss = loss + bow

        if self.num_latent > 0 and self.use_discriminator:
            dis = 0.0 - (layers.log(outputs["pos_probs"]) +
                         layers.log(1.0 - outputs["neg_probs"]))
            dis = layers.reduce_mean(dis)
            metrics["dis"] = dis
            loss = loss + dis * self.dis_ratio

        metrics["loss"] = loss
        metrics["token_num"] = tgt_len
        return metrics
Esempio n. 7
0
def create_model(args, config):
    """Create model for given model configuration."""
    logging.info('building model')
    graph_wrapper = GraphWrapper(name="graph",
                                 node_feat=[('atom_type', [None, 1], "int64"),
                                            ('chirality_tag', [None,
                                                               1], "int64")],
                                 edge_feat=[('bond_type', [None, 1], "int64"),
                                            ('bond_direction', [None,
                                                                1], "int64")])

    # NOTE: [num_nodes, num_graphs], bs = num_graphs
    pos_mask = L.data(name='pos_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')
    neg_mask = L.data(name='neg_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')

    encoder = GINEncoder(config)
    global_repr, patch_summary = encoder.forward(graph_wrapper)

    global_D = FF(encoder.embedding_dim)
    local_D = FF(encoder.embedding_dim)
    g_enc = global_D.forward(global_repr)
    l_enc = local_D.forward(patch_summary)

    res = L.matmul(l_enc, g_enc, transpose_y=True)
    E_pos = get_positive_expectation(res * pos_mask,
                                     config['measure'],
                                     average=False)
    E_pos = L.reduce_sum(E_pos) / graph_wrapper.num_nodes
    E_neg = get_negative_expectation(res * neg_mask,
                                     config['measure'],
                                     average=False)
    E_neg = L.reduce_sum(E_neg) / (graph_wrapper.num_nodes *
                                   (graph_wrapper.num_graph - 1))
    local_global_loss = E_neg - E_pos

    if config['prior']:
        prior_D = PriorDiscriminator(encoder.embedding_dim)
        prior = L.uniform_random([args.batch_size, encoder.embedding_dim],
                                 min=0.0,
                                 max=1.0)
        term_1 = L.reduce_mean(L.log(prior_D.forward(prior)))
        term_2 = L.reduce_mean(L.log(1.0 - prior_D.forward(global_repr)))
        prior_loss = -(term_1 + term_2) * config['gamma']
    else:
        prior_loss = 0

    total_loss = local_global_loss + prior_loss

    keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb']
    Agent = namedtuple('Agent', keys)
    return Agent(loss=total_loss,
                 graph_wrapper=graph_wrapper,
                 encoder=encoder,
                 graph_emb=global_repr)
Esempio n. 8
0
    def forward(self):
        """ forward
        """
        src, dst = L.read_file(self.pyreader)

        if self.is_sparse:
            # sparse mode use 2 dims input.
            src = L.reshape(src, [-1, 1])
            dst = L.reshape(dst, [-1, 1])

        src_embed = split_embedding(src, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size,
                                    self.embed_init, "weight", self.num_part,
                                    self.is_sparse)

        if self.is_sparse:
            src_embed = L.reshape(src_embed,
                                  [-1, 1, self.num_featuers, self.hidden_size])
            dst_embed = L.reshape(
                dst_embed,
                [-1, self.neg_num + 1, self.num_featuers, self.hidden_size])

        src_embed = L.reduce_mean(src_embed, 2)
        dst_embed = L.reduce_mean(dst_embed, 2)

        logits = L.matmul(src_embed, dst_embed,
                          transpose_y=True)  # [batch_size, 1, neg_num+1]

        pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                    "float32", 1)
        neg_label = L.fill_constant_batch_size_like(logits,
                                                    [-1, 1, self.neg_num],
                                                    "float32", 0)
        label = L.concat([pos_label, neg_label], -1)

        pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1],
                                                     "float32", self.neg_num)
        neg_weight = L.fill_constant_batch_size_like(logits,
                                                     [-1, 1, self.neg_num],
                                                     "float32", 1)
        weight = L.concat([pos_weight, neg_weight], -1)

        weight.stop_gradient = True
        label.stop_gradient = True

        loss = L.sigmoid_cross_entropy_with_logits(logits, label)
        loss = loss * weight
        loss = L.reduce_mean(loss)
        loss = loss * ((self.neg_num + 1) / 2 / self.neg_num)
        loss.persistable = True
        self.loss = loss
        return loss
Esempio n. 9
0
    def forward(self, input):
        rho_ = L.clip(self.rho, min=0, max=1)
        in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True)
        in_var = var(input, dim=[2, 3], keepdim=True)
        out_in = (input - in_mean) / L.sqrt(in_var + self.eps)
        ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True)
        ln_var = var(input, dim=[1, 2, 3], keepdim=True)
        out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps)
        out = rho_ * out_in + (1 - rho_) * out_ln
        out = out * self.gamma + self.beta

        return out
Esempio n. 10
0
    def pointwise_loss(self):
        """point wise model"""
        self.logits = L.reduce_sum(self.query_repr * self.poi_repr, -1)
        self.score = L.sigmoid(self.logits)
        self.loss = L.sigmoid_cross_entropy_with_logits(
            L.reshape(self.logits, [-1, 1]), L.reshape(self.labels, [-1, 1]))

        auc_label = L.cast(self.labels, dtype="int64")
        auc_label.stop_gradients = True
        _, self.batch_auc, _ = L.auc(
            L.reshape(self.score, [-1, 1]), L.reshape(auc_label, [-1, 1]))
        self.metrics = [L.reduce_mean(self.loss), self.batch_auc]
        self.loss = L.reduce_mean(self.loss)
Esempio n. 11
0
    def forward(self, input, gamma, beta):
        rho_ = L.clip(self.rho, min=0, max=1)
        in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True)
        in_var = var(input, dim=[2, 3], keepdim=True)
        out_in = (input - in_mean) / L.sqrt(in_var + self.eps)
        ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True)
        ln_var = var(input, dim=[1, 2, 3], keepdim=True)
        out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps)
        out = rho_ * out_in + (1 - rho_) * out_ln
        out = out * L.unsqueeze(gamma, axes=[2, 3]) + L.unsqueeze(beta,
                                                                  axes=[2, 3])

        return out
Esempio n. 12
0
def var(input, dim=None, keep_dim=True, unbiased=True, name=None):
    rank = len(input.shape)
    dims = dim if dim is not None and dim != [] else range(rank)
    dims = [e if e >= 0 else e + rank for e in dims]
    inp_shape = input.shape
    mean = reduce_mean(input, dim=dim, keep_dim=True, name=name)
    tmp = reduce_mean((input - mean) ** 2, dim=dim, keep_dim=True, name=name)
    if unbiased:
        n = 1
        for i in dims:
            n *= inp_shape[i]
        factor = n / (n - 1.0) if n > 1.0 else 0.0
        tmp *= factor
    return tmp
Esempio n. 13
0
 def forward(self, x):
     """ Forward process of LayerNorm. """
     mean = layers.reduce_mean(x,
                               dim=list(range(self._begin_norm_axis, len(x.shape))),
                               keep_dim=True)
     shift_x = layers.elementwise_sub(x=x, y=mean, axis=0)
     variance = layers.reduce_mean(layers.square(shift_x),
                                   dim=list(range(self._begin_norm_axis, len(x.shape))),
                                   keep_dim=True)
     r_stdev = layers.rsqrt(variance + self._epsilon)
     norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0)
     out = layers.elementwise_mul(x=norm_x, y=self._scale_w, axis=-1)
     out = layers.elementwise_add(x=out, y=self._bias_w, axis=-1)
     return out
Esempio n. 14
0
    def forward(self, input):
        
        in_mean = layers.reduce_mean(input, dim=[2, 3], keep_dim=True)
        in_var = get_var(input, dim=[2, 3], keepdim=True)
        out_in = (input - in_mean) / layers.sqrt(in_var + self.eps)
        
        ln_mean = layers.reduce_mean(input, dim=[1, 2, 3], keep_dim=True)
        ln_var = get_var(input, dim=[2, 3], keepdim=True)
        out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps)

        out = fluid.layers.expand(self.rho, [input.shape[0], 1, 1, 1]) * out_in + (1-fluid.layers.expand(self.rho, [input.shape[0], 1, 1, 1])) * out_ln
        out = out * fluid.layers.expand(self.gamma, [input.shape[0], 1, 1, 1]) + fluid.layers.expand(self.beta, [input.shape[0], 1, 1, 1])

        return out
Esempio n. 15
0
    def points2bbox(self, pts, y_first=True):
        """点集转换成包围框.

        :param pts: the input points sets (fields), each points
            set (fields) is represented as 2n scalar.
        :param y_first: if y_first=True, the point set is represented as
            [y1, x1, y2, x2 ... yn, xn], otherwise the point set is
            represented as [x1, y1, x2, y2 ... xn, yn].
        :return: each points set is converting to a bbox [x1, y1, x2, y2].
        """
        pts_reshape = L.reshape(pts, (pts.shape[0], -1, 2, pts.shape[2], pts.shape[3]))
        pts_y = pts_reshape[:, :, 0, :, :] if y_first else pts_reshape[:, :, 1, :, :]
        pts_x = pts_reshape[:, :, 1, :, :] if y_first else pts_reshape[:, :, 0, :, :]
        if self.transform_method == 'minmax':
            # bbox_left = pts_x.min(dim=1, keepdim=True)[0]
            # bbox_right = pts_x.max(dim=1, keepdim=True)[0]
            # bbox_up = pts_y.min(dim=1, keepdim=True)[0]
            # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
            # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
            #                  dim=1)
            pass
        elif self.transform_method == 'partial_minmax':
            # pts_y = pts_y[:, :4, ...]
            # pts_x = pts_x[:, :4, ...]
            # bbox_left = pts_x.min(dim=1, keepdim=True)[0]
            # bbox_right = pts_x.max(dim=1, keepdim=True)[0]
            # bbox_up = pts_y.min(dim=1, keepdim=True)[0]
            # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]
            # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],
            #                  dim=1)
            pass
        elif self.transform_method == 'moment':
            pts_y_mean = L.reduce_mean(pts_y, dim=1, keep_dim=True)
            pts_x_mean = L.reduce_mean(pts_x, dim=1, keep_dim=True)
            pts_y_std = paddle.std(pts_y - pts_y_mean, axis=1, keepdim=True)
            pts_x_std = paddle.std(pts_x - pts_x_mean, axis=1, keepdim=True)
            moment_transfer = (self.moment_transfer * self.moment_mul) + (
                self.moment_transfer.detach() * (1 - self.moment_mul))
            moment_width_transfer = moment_transfer[0]
            moment_height_transfer = moment_transfer[1]
            half_width = pts_x_std * L.exp(moment_width_transfer)
            half_height = pts_y_std * L.exp(moment_height_transfer)
            bbox = L.concat([
                pts_x_mean - half_width, pts_y_mean - half_height,
                pts_x_mean + half_width, pts_y_mean + half_height
            ], axis=1)
        else:
            raise NotImplementedError
        return bbox
Esempio n. 16
0
    def forward(self, x):
        x = layers.transpose(x, perm=[0, 2, 1, 3, 4])
        x = fluid.layers.pool3d(x,
                                pool_size=(3, 1, 1),
                                pool_type='avg',
                                pool_stride=(2, 1, 1))
        b, c, t, h, w = x.shape
        x = layers.transpose(x, perm=[0, 2, 1, 3, 4])
        x = layers.reshape(x, shape=[b * t, c, h, w])
        x = self.stem(x)
        #print(self.stem.weight.numpy().sum())
        x = self.bn1(x)
        x = layers.pool2d(x,
                          pool_size=3,
                          pool_type='max',
                          pool_stride=2,
                          pool_padding=1)
        x = self.res2(x)
        x = self.res3(x)
        bt, c, h, w = x.shape
        x = layers.reshape(x, shape=[b, t, c, h, w])
        x = layers.transpose(x, perm=[0, 2, 1, 3, 4])
        x = fluid.layers.pool3d(x,
                                pool_size=(3, 1, 1),
                                pool_type='avg',
                                pool_stride=(2, 1, 1))
        b, c, t, h, w = x.shape
        x = layers.transpose(x, perm=[0, 2, 1, 3, 4])
        res = layers.reshape(x[:, 1:-1], shape=[-1, c, h, w])
        x = layers.reshape(x, shape=[b * t, c, h, w])
        x = self.rep_flow(x)
        x = self.flow_conv(x)
        x = self.rep_flow2(x)
        x = layers.relu(res + x)
        x = self.res4(x)
        x = self.res5(x)

        x = self.dropout(x)
        x = layers.reduce_mean(x, dim=3)
        x = layers.reduce_mean(x, dim=2)

        x = layers.reshape(x, shape=[x.shape[0], -1])
        x = self.classify(x)

        x = layers.reshape(x, shape=[b, -1, self.num_classes])

        x = layers.reduce_mean(x, dim=1)
        return x
Esempio n. 17
0
def node2vec_model(graph, hidden_size=16, neg_num=5):

    pyreader = l.py_reader(
        capacity=70,
        shapes=[[-1, 1, 1], [-1, 1, 1], [-1, neg_num, 1]],
        dtypes=['int64', 'int64', 'int64'],
        lod_levels=[0, 0, 0],
        name='train',
        use_double_buffer=True)

    embed_init = fluid.initializer.UniformInitializer(low=-1.0, high=1.0)
    weight_init = fluid.initializer.TruncatedNormal(scale=1.0 /
                                                    math.sqrt(hidden_size))

    src, pos, negs = l.read_file(pyreader)

    embed_src = l.embedding(
        input=src,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='content', initializer=embed_init))

    weight_pos = l.embedding(
        input=pos,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='weight', initializer=weight_init))
    weight_negs = l.embedding(
        input=negs,
        size=[graph.num_nodes, hidden_size],
        param_attr=fluid.ParamAttr(
            name='weight', initializer=weight_init))

    pos_logits = l.matmul(
        embed_src, weight_pos, transpose_y=True)  # [batch_size, 1, 1]
    neg_logits = l.matmul(
        embed_src, weight_negs, transpose_y=True)  # [batch_size, 1, neg_num]

    ones_label = pos_logits * 0. + 1.
    ones_label.stop_gradient = True
    pos_loss = l.sigmoid_cross_entropy_with_logits(pos_logits, ones_label)

    zeros_label = neg_logits * 0.
    zeros_label.stop_gradient = True
    neg_loss = l.sigmoid_cross_entropy_with_logits(neg_logits, zeros_label)
    loss = (l.reduce_mean(pos_loss) + l.reduce_mean(neg_loss)) / 2

    return pyreader, loss
Esempio n. 18
0
def fuse_math_min_mean_neg(x):
    """
    Fuse operation min mean for hinge loss computation of negative samples
    """
    minval = L.clip(-x - 1, -1e8, 0)
    loss = - L.reduce_mean(minval)
    return loss
Esempio n. 19
0
def link_predict_model(num_nodes,
                       hidden_size=16,
                       name='link_predict_task',
                       binary_op_type="Weighted-L2"):
    pyreader = l.py_reader(capacity=70,
                           shapes=[[-1, 1], [-1, 1], [-1, 1]],
                           dtypes=['int64', 'int64', 'int64'],
                           lod_levels=[0, 0, 0],
                           name=name + '_pyreader',
                           use_double_buffer=True)
    u, v, label = l.read_file(pyreader)
    u_embed = l.embedding(input=u,
                          size=[num_nodes, hidden_size],
                          param_attr=fluid.ParamAttr(name='content'))
    v_embed = l.embedding(input=v,
                          size=[num_nodes, hidden_size],
                          param_attr=fluid.ParamAttr(name='content'))
    u_embed.stop_gradient = True
    v_embed.stop_gradient = True

    edge_embed = binary_op(u_embed, v_embed, binary_op_type)
    logit = l.fc(input=edge_embed, size=1)
    loss = l.sigmoid_cross_entropy_with_logits(logit, l.cast(label, 'float32'))
    loss = l.reduce_mean(loss)

    prob = l.sigmoid(logit)
    return pyreader, loss, prob, label
Esempio n. 20
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size]): 
                ground truth label id for each sentence
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, hidden_size]):
                output logits of classifier
        """
        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForSequenceClassification,
                                self).forward(*args, **kwargs)
        hidden = self.dropout(pooled)
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 1:
                labels = L.reshape(labels, [-1, 1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
Esempio n. 21
0
    def train_forward(self):
        entity_embedding, relation_embedding, transfer_matrix = self.create_share_variables(
        )
        pos_head = self.lookup_table(self.train_pos_input[:, 0],
                                     entity_embedding)
        pos_tail = self.lookup_table(self.train_pos_input[:, 2],
                                     entity_embedding)
        pos_rel = self.lookup_table(self.train_pos_input[:, 1],
                                    relation_embedding)
        neg_head = self.lookup_table(self.train_neg_input[:, 0],
                                     entity_embedding)
        neg_tail = self.lookup_table(self.train_neg_input[:, 2],
                                     entity_embedding)
        neg_rel = self.lookup_table(self.train_neg_input[:, 1],
                                    relation_embedding)

        rel_matrix = layers.reshape(
            self.lookup_table(self.train_pos_input[:, 1], transfer_matrix),
            [-1, self.hidden_size, self.hidden_size])
        pos_head_trans = self.matmul_with_expend_dims(pos_head, rel_matrix)
        pos_tail_trans = self.matmul_with_expend_dims(pos_tail, rel_matrix)

        rel_matrix_neg = layers.reshape(
            self.lookup_table(self.train_neg_input[:, 1], transfer_matrix),
            [-1, self.hidden_size, self.hidden_size])
        neg_head_trans = self.matmul_with_expend_dims(neg_head, rel_matrix_neg)
        neg_tail_trans = self.matmul_with_expend_dims(neg_tail, rel_matrix_neg)

        pos_score = self._algorithm(pos_head_trans, pos_rel, pos_tail_trans)
        neg_score = self._algorithm(neg_head_trans, neg_rel, neg_tail_trans)
        pos = layers.reduce_sum(layers.abs(pos_score), -1, keep_dim=False)
        neg = layers.reduce_sum(layers.abs(neg_score), -1, keep_dim=False)
        neg = layers.reshape(neg, shape=[-1, 1], inplace=True)
        loss = layers.reduce_mean(layers.relu(pos - neg + self.margin))
        return [loss]
Esempio n. 22
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size, seq_len]): 
                ground truth label id for each token
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, seq_len, hidden_size]):
                output logits of classifier
        """

        labels = kwargs.pop('labels', None)
        pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs)
        hidden = self.dropout(encoded) # maybe not?
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 2:
                labels = L.unsqueeze(labels, axes=[-1])
            loss = L.softmax_with_cross_entropy(logits, labels)
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
Esempio n. 23
0
def create_model(args, config, graph_label):
    """Create model for given model configuration."""
    logging.info('building model')
    graph_wrapper = GraphWrapper(name="graph",
                                 node_feat=[('atom_type', [None, 1], "int64"),
                                            ('chirality_tag', [None,
                                                               1], "int64")],
                                 edge_feat=[('bond_type', [None, 1], "int64"),
                                            ('bond_direction', [None,
                                                                1], "int64")])

    encoder = GINEncoder(config)
    global_repr, patch_summary = encoder.forward(graph_wrapper)

    hid = L.fc(global_repr,
               config['hidden_size'],
               act='relu',
               name='finetune_fc1')
    hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2')

    logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3")
    loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label)
    loss = L.reduce_mean(loss)
    pred = L.sigmoid(logits)

    keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred']
    Agent = namedtuple('Agent', keys)
    return Agent(loss=loss,
                 graph_wrapper=graph_wrapper,
                 encoder=encoder,
                 graph_emb=global_repr,
                 pred=pred)
Esempio n. 24
0
def soft_dice_loss(logits, labels):
    probs = L.softmax(logits, axis=-1)
    one_hot = F.one_hot(labels, depth=probs.shape[-1])
    intersection = L.reduce_sum(probs * one_hot, dim=-1)
    # union = L.reduce_sum(probs, axis=-1) + L.reduce_sum(labels, axis=-1)
    loss = 1 - intersection
    return L.reduce_mean(loss)
Esempio n. 25
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
Esempio n. 26
0
def node_classify_model(word2id, num_labels, embed_dim=16):
    """Build node classify model.

    Args:
        word2id(dict): map word(node) to its corresponding index

        num_labels: The number of labels.

        embed_dim: The dimension of embedding.
    """

    nodes = fl.data('nodes', shape=[None, 1], dtype='int64')
    labels = fl.data('labels', shape=[None, 1], dtype='int64')

    embed_nodes = fl.embedding(input=nodes,
                               size=[len(word2id), embed_dim],
                               param_attr=fluid.ParamAttr(name='content'))

    embed_nodes.stop_gradient = True
    probs = fl.fc(input=embed_nodes, size=num_labels, act='softmax')
    predict = fl.argmax(probs, axis=-1)
    loss = fl.cross_entropy(input=probs, label=labels)
    loss = fl.reduce_mean(loss)

    return {
        'loss': loss,
        'probs': probs,
        'predict': predict,
        'labels': labels,
    }
Esempio n. 27
0
 def forward(self, src_ids, *args, **kwargs):
     tgt_labels = kwargs.pop('tgt_labels', None)
     tgt_pos = kwargs.pop('tgt_pos', None)
     encode_only = kwargs.pop('encode_only', False)
     _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs)
     #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy())
     #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy())
     if encode_only:
         return None, None, info
     elif tgt_labels is None:
         encoded = self.mlm(encoded)
         encoded = self.mlm_ln(encoded)
         logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         output_ids = L.argmax(logits, -1)
         return output_ids, logits, info
     else:
         encoded_2d = L.gather_nd(encoded, tgt_pos)
         #log.debug('input shape %s' % repr(src_ids.shape))
         #log.debug(L.gather_nd(src_ids, tgt_pos).numpy())
         encoded_2d = self.mlm(encoded_2d)
         encoded_2d = self.mlm_ln(encoded_2d)
         logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         if len(tgt_labels.shape) == 1:
             tgt_labels = L.reshape(tgt_labels, [-1, 1])
         
         loss = L.reduce_mean(
                 L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
                 )
         return loss, logits_2d, info
Esempio n. 28
0
    def forward(self):
        """forward"""
        features_list = [self.gw.node_feat["attr"]]

        for i in range(self.num_layers):
            h = gin(self.gw,
                    features_list[i],
                    hidden_size=self.hidden_size,
                    activation="relu",
                    name="gin_%s" % (i),
                    init_eps=0.0,
                    train_eps=self.train_eps)

            h = fl.batch_norm(h)
            h = fl.relu(h)

            features_list.append(h)

        output = 0
        for i, h in enumerate(features_list):
            pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type)
            drop_h = fl.dropout(pooled_h,
                                self.dropout_prob,
                                dropout_implementation="upscale_in_train")
            output += fl.fc(drop_h,
                            size=self.num_class,
                            act=None,
                            param_attr=fluid.ParamAttr(name="final_fc_%s" %
                                                       (i)))

        # calculate loss
        self.loss = fl.softmax_with_cross_entropy(output, self.labels)
        self.loss = fl.reduce_mean(self.loss)
        self.acc = fl.accuracy(fl.softmax(output), self.labels)
Esempio n. 29
0
    def forward(self, is_test=False):
        """
        Build the network.
        """
        graph_wrapper = GraphWrapper(name="graph",
                node_feat=[
                    ('atom_type', [None, 1], "int64"), 
                    ('chirality_tag', [None, 1], "int64")],
                edge_feat=[
                    ('bond_type', [None, 1], "int64"),
                    ('bond_direction', [None, 1], "int64")])
        masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64")
        masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64")

        node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test)
        masked_node_repr = layers.gather(node_repr, masked_node_indice)
        logits = layers.fc(masked_node_repr, 
                size=len(CompoundConstants.atom_num_list),
                name="masked_node_logits")

        loss, pred = layers.softmax_with_cross_entropy(
                logits, masked_node_label, return_softmax=True)
        loss = layers.reduce_mean(loss)
        acc = layers.accuracy(pred, masked_node_label)

        self.graph_wrapper = graph_wrapper
        self.loss = loss
Esempio n. 30
0
 def get_node_repr(self):
     if self.config.JK == "last":
         return self.feature_list[-1]
     elif self.config.JK == "mean":
         return L.reduce_mean(self.feature_list, axis=0)
     else:
         return L.reduce_sum(self.feature_list, axis=0)