def forward(self, input): in_mean, in_var = layers.reduce_mean(input, dim=[2, 3], keep_dim=True), var(input, axis=[2, 3], keep_dim=True) debug_print('reduce_mean') out_in = (input - in_mean) / layers.sqrt(in_var + self.eps) debug_print('sqrt') ln_mean, ln_var = layers.reduce_mean(input, dim=[1, 2, 3], keep_dim=True), var( input, axis=[1, 2, 3], keep_dim=True) debug_print('reduce_mean') out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps) debug_print('sqrt') out = layers.expand(self.rho, [input.shape[0], 1, 1, 1]) * out_in + ( 1 - layers.expand(self.rho, [input.shape[0], 1, 1, 1])) * out_ln debug_print('expand') out = out * layers.expand(self.gamma, [input.shape[0], 1, 1, 1]) + layers.expand( self.beta, [input.shape[0], 1, 1, 1]) debug_print('expand') return out
def forward(self, input, gamma, beta): in_mean, in_var = reduce_mean(input, dim=[2, 3], keep_dim=True), my_var(input, dim=[2, 3], keep_dim=True) out_in = (input - in_mean) / sqrt(in_var + self.eps) ln_mean, ln_var = reduce_mean(input, dim=[1, 2, 3], keep_dim=True), my_var(input, dim=[1, 2, 3], keep_dim=True) out_ln = (input - ln_mean) / sqrt(ln_var + self.eps) ex_rho = expand(self.rho, (input.shape[0], 1, 1, 1)) out = ex_rho * out_in + (1 - ex_rho) * out_ln gamma = unsqueeze(gamma, axes=2) gamma = unsqueeze(gamma, axes=3) beta = unsqueeze(beta, axes=2) beta = unsqueeze(beta, axes=3) out = out * gamma + beta return out
def forward(self, *args, **kwargs): """ Args: start_pos (optional, `Variable` of shape [batch_size]): token index of start of answer span in `context` end_pos (optional, `Variable` of shape [batch_size]): token index of end of answer span in `context` Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None start_logits (`Variable` of shape [batch_size, hidden_size]): output logits of start position, use argmax(start_logit) to get start index end_logits (`Variable` of shape [batch_size, hidden_size]): output logits of end position, use argmax(end_logit) to get end index """ start_pos = kwargs.pop('start_pos', None) end_pos = kwargs.pop('end_pos', None) pooled, encoded = super(ErnieModelForQuestionAnswering, self).forward(*args, **kwargs) encoded = self.dropout(encoded) encoded = self.classifier(encoded) start_logit, end_logits = L.unstack(encoded, axis=-1) if start_pos is not None and end_pos is not None: if len(start_pos.shape) == 1: start_pos = L.unsqueeze(start_pos, axes=[-1]) if len(end_pos.shape) == 1: end_pos = L.unsqueeze(end_pos, axes=[-1]) start_loss = L.softmax_with_cross_entropy(start_logit, start_pos) end_loss = L.softmax_with_cross_entropy(end_logits, end_pos) loss = (L.reduce_mean(start_loss) + L.reduce_mean(end_loss)) / 2. else: loss = None return loss, start_logit, end_logits
def var(input, axis=None, keepdim=False, unbiased=True, out=None, name=None): dtype = convert_dtype(input.dtype) if dtype not in ["float32", "float64"]: raise ValueError("Layer tensor.var() only supports floating-point " "dtypes, but received {}.".format(dtype)) rank = len(input.shape) axes = axis if axis != None and axis != [] else range(rank) axes = [e if e >= 0 else e + rank for e in axes] inp_shape = input.shape if in_dygraph_mode() else layers.shape(input) mean = layers.reduce_mean(input, dim=axis, keep_dim=True, name=name) tmp = layers.reduce_mean( (input - mean)**2, dim=axis, keep_dim=keepdim, name=name) if unbiased: n = 1 for i in axes: n *= inp_shape[i] if not in_dygraph_mode(): n = layers.cast(n, dtype) zero_const = layers.fill_constant(shape=[1], dtype=dtype, value=0.0) factor = where(n > 1.0, n / (n - 1.0), zero_const) else: factor = n / (n - 1.0) if n > 1.0 else 0.0 tmp *= factor if out: layers.assign(input=tmp, output=out) return out else: return tmp
def layer_norm(x, begin_norm_axis=1, epsilon=1e-12, param_attr=None, bias_attr=None): """ Replace build-in layer_norm op with this function """ helper = LayerHelper('layer_norm', **locals()) mean = layers.reduce_mean(x, dim=begin_norm_axis, keep_dim=True) shift_x = layers.elementwise_sub(x=x, y=mean, axis=0) variance = layers.reduce_mean( layers.square(shift_x), dim=begin_norm_axis, keep_dim=True) r_stdev = layers.rsqrt(variance + epsilon) norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0) param_shape = [reduce(lambda x, y: x * y, norm_x.shape[begin_norm_axis:])] param_dtype = norm_x.dtype scale = helper.create_parameter( attr=param_attr, shape=param_shape, dtype=param_dtype, default_initializer=fluid.initializer.Constant(1.)) bias = helper.create_parameter( attr=bias_attr, shape=param_shape, dtype=param_dtype, is_bias=True, default_initializer=fluid.initializer.Constant(0.)) out = layers.elementwise_mul(x=norm_x, y=scale, axis=-1) out = layers.elementwise_add(x=out, y=bias, axis=-1) return out
def _collect_metrics(self, inputs, outputs): """ Calculate loss function by using inputs and outputs. """ metrics = {} tgt_len = layers.reduce_sum( layers.reduce_sum(inputs["tgt_mask"], dim=1) - 1) tgt_len.stop_gradient = True label = inputs["tgt_token"][:, 1:] if self.label_smooth > 0: one_hot_label = layers.one_hot(label, self.num_token_embeddings) smooth_label = layers.label_smooth(one_hot_label, epsilon=self.label_smooth, dtype=self._dtype) nll = layers.cross_entropy(outputs["dec_pred"], smooth_label, soft_label=True, ignore_index=self.padding_idx) else: nll = layers.cross_entropy(outputs["dec_probs"], label, ignore_index=self.padding_idx) nll = layers.reduce_sum(nll, dim=1) token_nll = layers.reduce_sum(nll) / tgt_len nll = layers.reduce_mean(nll) metrics["nll"] = nll metrics["token_nll"] = token_nll loss = nll if self.num_latent > 0 and self.with_bow: bow_probs = F.unsqueeze(outputs["bow_probs"], [1]) bow_probs = layers.expand(bow_probs, [1, label.shape[1], 1]) if self.label_smooth > 0: bow = layers.cross_entropy(bow_probs, smooth_label, soft_label=True, ignore_index=self.padding_idx) else: bow = layers.cross_entropy(bow_probs, label, ignore_index=self.padding_idx) bow = layers.reduce_sum(bow, dim=1) token_bow = layers.reduce_sum(bow) / tgt_len bow = layers.reduce_mean(bow) metrics["bow"] = bow metrics["token_bow"] = token_bow loss = loss + bow if self.num_latent > 0 and self.use_discriminator: dis = 0.0 - (layers.log(outputs["pos_probs"]) + layers.log(1.0 - outputs["neg_probs"])) dis = layers.reduce_mean(dis) metrics["dis"] = dis loss = loss + dis * self.dis_ratio metrics["loss"] = loss metrics["token_num"] = tgt_len return metrics
def create_model(args, config): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) # NOTE: [num_nodes, num_graphs], bs = num_graphs pos_mask = L.data(name='pos_mask', shape=[-1, args.batch_size], dtype='float32') neg_mask = L.data(name='neg_mask', shape=[-1, args.batch_size], dtype='float32') encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) global_D = FF(encoder.embedding_dim) local_D = FF(encoder.embedding_dim) g_enc = global_D.forward(global_repr) l_enc = local_D.forward(patch_summary) res = L.matmul(l_enc, g_enc, transpose_y=True) E_pos = get_positive_expectation(res * pos_mask, config['measure'], average=False) E_pos = L.reduce_sum(E_pos) / graph_wrapper.num_nodes E_neg = get_negative_expectation(res * neg_mask, config['measure'], average=False) E_neg = L.reduce_sum(E_neg) / (graph_wrapper.num_nodes * (graph_wrapper.num_graph - 1)) local_global_loss = E_neg - E_pos if config['prior']: prior_D = PriorDiscriminator(encoder.embedding_dim) prior = L.uniform_random([args.batch_size, encoder.embedding_dim], min=0.0, max=1.0) term_1 = L.reduce_mean(L.log(prior_D.forward(prior))) term_2 = L.reduce_mean(L.log(1.0 - prior_D.forward(global_repr))) prior_loss = -(term_1 + term_2) * config['gamma'] else: prior_loss = 0 total_loss = local_global_loss + prior_loss keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb'] Agent = namedtuple('Agent', keys) return Agent(loss=total_loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr)
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) src_embed = L.reduce_mean(src_embed, 2) dst_embed = L.reduce_mean(dst_embed, 2) logits = L.matmul(src_embed, dst_embed, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def forward(self, input): rho_ = L.clip(self.rho, min=0, max=1) in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True) in_var = var(input, dim=[2, 3], keepdim=True) out_in = (input - in_mean) / L.sqrt(in_var + self.eps) ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True) ln_var = var(input, dim=[1, 2, 3], keepdim=True) out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps) out = rho_ * out_in + (1 - rho_) * out_ln out = out * self.gamma + self.beta return out
def pointwise_loss(self): """point wise model""" self.logits = L.reduce_sum(self.query_repr * self.poi_repr, -1) self.score = L.sigmoid(self.logits) self.loss = L.sigmoid_cross_entropy_with_logits( L.reshape(self.logits, [-1, 1]), L.reshape(self.labels, [-1, 1])) auc_label = L.cast(self.labels, dtype="int64") auc_label.stop_gradients = True _, self.batch_auc, _ = L.auc( L.reshape(self.score, [-1, 1]), L.reshape(auc_label, [-1, 1])) self.metrics = [L.reduce_mean(self.loss), self.batch_auc] self.loss = L.reduce_mean(self.loss)
def forward(self, input, gamma, beta): rho_ = L.clip(self.rho, min=0, max=1) in_mean = L.reduce_mean(input, dim=[2, 3], keep_dim=True) in_var = var(input, dim=[2, 3], keepdim=True) out_in = (input - in_mean) / L.sqrt(in_var + self.eps) ln_mean = L.reduce_mean(input, dim=[1, 2, 3], keep_dim=True) ln_var = var(input, dim=[1, 2, 3], keepdim=True) out_ln = (input - ln_mean) / L.sqrt(ln_var + self.eps) out = rho_ * out_in + (1 - rho_) * out_ln out = out * L.unsqueeze(gamma, axes=[2, 3]) + L.unsqueeze(beta, axes=[2, 3]) return out
def var(input, dim=None, keep_dim=True, unbiased=True, name=None): rank = len(input.shape) dims = dim if dim is not None and dim != [] else range(rank) dims = [e if e >= 0 else e + rank for e in dims] inp_shape = input.shape mean = reduce_mean(input, dim=dim, keep_dim=True, name=name) tmp = reduce_mean((input - mean) ** 2, dim=dim, keep_dim=True, name=name) if unbiased: n = 1 for i in dims: n *= inp_shape[i] factor = n / (n - 1.0) if n > 1.0 else 0.0 tmp *= factor return tmp
def forward(self, x): """ Forward process of LayerNorm. """ mean = layers.reduce_mean(x, dim=list(range(self._begin_norm_axis, len(x.shape))), keep_dim=True) shift_x = layers.elementwise_sub(x=x, y=mean, axis=0) variance = layers.reduce_mean(layers.square(shift_x), dim=list(range(self._begin_norm_axis, len(x.shape))), keep_dim=True) r_stdev = layers.rsqrt(variance + self._epsilon) norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0) out = layers.elementwise_mul(x=norm_x, y=self._scale_w, axis=-1) out = layers.elementwise_add(x=out, y=self._bias_w, axis=-1) return out
def forward(self, input): in_mean = layers.reduce_mean(input, dim=[2, 3], keep_dim=True) in_var = get_var(input, dim=[2, 3], keepdim=True) out_in = (input - in_mean) / layers.sqrt(in_var + self.eps) ln_mean = layers.reduce_mean(input, dim=[1, 2, 3], keep_dim=True) ln_var = get_var(input, dim=[2, 3], keepdim=True) out_ln = (input - ln_mean) / layers.sqrt(ln_var + self.eps) out = fluid.layers.expand(self.rho, [input.shape[0], 1, 1, 1]) * out_in + (1-fluid.layers.expand(self.rho, [input.shape[0], 1, 1, 1])) * out_ln out = out * fluid.layers.expand(self.gamma, [input.shape[0], 1, 1, 1]) + fluid.layers.expand(self.beta, [input.shape[0], 1, 1, 1]) return out
def points2bbox(self, pts, y_first=True): """点集转换成包围框. :param pts: the input points sets (fields), each points set (fields) is represented as 2n scalar. :param y_first: if y_first=True, the point set is represented as [y1, x1, y2, x2 ... yn, xn], otherwise the point set is represented as [x1, y1, x2, y2 ... xn, yn]. :return: each points set is converting to a bbox [x1, y1, x2, y2]. """ pts_reshape = L.reshape(pts, (pts.shape[0], -1, 2, pts.shape[2], pts.shape[3])) pts_y = pts_reshape[:, :, 0, :, :] if y_first else pts_reshape[:, :, 1, :, :] pts_x = pts_reshape[:, :, 1, :, :] if y_first else pts_reshape[:, :, 0, :, :] if self.transform_method == 'minmax': # bbox_left = pts_x.min(dim=1, keepdim=True)[0] # bbox_right = pts_x.max(dim=1, keepdim=True)[0] # bbox_up = pts_y.min(dim=1, keepdim=True)[0] # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0] # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom], # dim=1) pass elif self.transform_method == 'partial_minmax': # pts_y = pts_y[:, :4, ...] # pts_x = pts_x[:, :4, ...] # bbox_left = pts_x.min(dim=1, keepdim=True)[0] # bbox_right = pts_x.max(dim=1, keepdim=True)[0] # bbox_up = pts_y.min(dim=1, keepdim=True)[0] # bbox_bottom = pts_y.max(dim=1, keepdim=True)[0] # bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom], # dim=1) pass elif self.transform_method == 'moment': pts_y_mean = L.reduce_mean(pts_y, dim=1, keep_dim=True) pts_x_mean = L.reduce_mean(pts_x, dim=1, keep_dim=True) pts_y_std = paddle.std(pts_y - pts_y_mean, axis=1, keepdim=True) pts_x_std = paddle.std(pts_x - pts_x_mean, axis=1, keepdim=True) moment_transfer = (self.moment_transfer * self.moment_mul) + ( self.moment_transfer.detach() * (1 - self.moment_mul)) moment_width_transfer = moment_transfer[0] moment_height_transfer = moment_transfer[1] half_width = pts_x_std * L.exp(moment_width_transfer) half_height = pts_y_std * L.exp(moment_height_transfer) bbox = L.concat([ pts_x_mean - half_width, pts_y_mean - half_height, pts_x_mean + half_width, pts_y_mean + half_height ], axis=1) else: raise NotImplementedError return bbox
def forward(self, x): x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = fluid.layers.pool3d(x, pool_size=(3, 1, 1), pool_type='avg', pool_stride=(2, 1, 1)) b, c, t, h, w = x.shape x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = layers.reshape(x, shape=[b * t, c, h, w]) x = self.stem(x) #print(self.stem.weight.numpy().sum()) x = self.bn1(x) x = layers.pool2d(x, pool_size=3, pool_type='max', pool_stride=2, pool_padding=1) x = self.res2(x) x = self.res3(x) bt, c, h, w = x.shape x = layers.reshape(x, shape=[b, t, c, h, w]) x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) x = fluid.layers.pool3d(x, pool_size=(3, 1, 1), pool_type='avg', pool_stride=(2, 1, 1)) b, c, t, h, w = x.shape x = layers.transpose(x, perm=[0, 2, 1, 3, 4]) res = layers.reshape(x[:, 1:-1], shape=[-1, c, h, w]) x = layers.reshape(x, shape=[b * t, c, h, w]) x = self.rep_flow(x) x = self.flow_conv(x) x = self.rep_flow2(x) x = layers.relu(res + x) x = self.res4(x) x = self.res5(x) x = self.dropout(x) x = layers.reduce_mean(x, dim=3) x = layers.reduce_mean(x, dim=2) x = layers.reshape(x, shape=[x.shape[0], -1]) x = self.classify(x) x = layers.reshape(x, shape=[b, -1, self.num_classes]) x = layers.reduce_mean(x, dim=1) return x
def node2vec_model(graph, hidden_size=16, neg_num=5): pyreader = l.py_reader( capacity=70, shapes=[[-1, 1, 1], [-1, 1, 1], [-1, neg_num, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name='train', use_double_buffer=True) embed_init = fluid.initializer.UniformInitializer(low=-1.0, high=1.0) weight_init = fluid.initializer.TruncatedNormal(scale=1.0 / math.sqrt(hidden_size)) src, pos, negs = l.read_file(pyreader) embed_src = l.embedding( input=src, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='content', initializer=embed_init)) weight_pos = l.embedding( input=pos, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) weight_negs = l.embedding( input=negs, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) pos_logits = l.matmul( embed_src, weight_pos, transpose_y=True) # [batch_size, 1, 1] neg_logits = l.matmul( embed_src, weight_negs, transpose_y=True) # [batch_size, 1, neg_num] ones_label = pos_logits * 0. + 1. ones_label.stop_gradient = True pos_loss = l.sigmoid_cross_entropy_with_logits(pos_logits, ones_label) zeros_label = neg_logits * 0. zeros_label.stop_gradient = True neg_loss = l.sigmoid_cross_entropy_with_logits(neg_logits, zeros_label) loss = (l.reduce_mean(pos_loss) + l.reduce_mean(neg_loss)) / 2 return pyreader, loss
def fuse_math_min_mean_neg(x): """ Fuse operation min mean for hinge loss computation of negative samples """ minval = L.clip(-x - 1, -1e8, 0) loss = - L.reduce_mean(minval) return loss
def link_predict_model(num_nodes, hidden_size=16, name='link_predict_task', binary_op_type="Weighted-L2"): pyreader = l.py_reader(capacity=70, shapes=[[-1, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name=name + '_pyreader', use_double_buffer=True) u, v, label = l.read_file(pyreader) u_embed = l.embedding(input=u, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) v_embed = l.embedding(input=v, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) u_embed.stop_gradient = True v_embed.stop_gradient = True edge_embed = binary_op(u_embed, v_embed, binary_op_type) logit = l.fc(input=edge_embed, size=1) loss = l.sigmoid_cross_entropy_with_logits(logit, l.cast(label, 'float32')) loss = l.reduce_mean(loss) prob = l.sigmoid(logit) return pyreader, loss, prob, label
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size]): ground truth label id for each sentence Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch if labels not set, returns None logits (`Variable` of shape [batch_size, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForSequenceClassification, self).forward(*args, **kwargs) hidden = self.dropout(pooled) logits = self.classifier(hidden) if labels is not None: if len(labels.shape) == 1: labels = L.reshape(labels, [-1, 1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) else: loss = None return loss, logits
def train_forward(self): entity_embedding, relation_embedding, transfer_matrix = self.create_share_variables( ) pos_head = self.lookup_table(self.train_pos_input[:, 0], entity_embedding) pos_tail = self.lookup_table(self.train_pos_input[:, 2], entity_embedding) pos_rel = self.lookup_table(self.train_pos_input[:, 1], relation_embedding) neg_head = self.lookup_table(self.train_neg_input[:, 0], entity_embedding) neg_tail = self.lookup_table(self.train_neg_input[:, 2], entity_embedding) neg_rel = self.lookup_table(self.train_neg_input[:, 1], relation_embedding) rel_matrix = layers.reshape( self.lookup_table(self.train_pos_input[:, 1], transfer_matrix), [-1, self.hidden_size, self.hidden_size]) pos_head_trans = self.matmul_with_expend_dims(pos_head, rel_matrix) pos_tail_trans = self.matmul_with_expend_dims(pos_tail, rel_matrix) rel_matrix_neg = layers.reshape( self.lookup_table(self.train_neg_input[:, 1], transfer_matrix), [-1, self.hidden_size, self.hidden_size]) neg_head_trans = self.matmul_with_expend_dims(neg_head, rel_matrix_neg) neg_tail_trans = self.matmul_with_expend_dims(neg_tail, rel_matrix_neg) pos_score = self._algorithm(pos_head_trans, pos_rel, pos_tail_trans) neg_score = self._algorithm(neg_head_trans, neg_rel, neg_tail_trans) pos = layers.reduce_sum(layers.abs(pos_score), -1, keep_dim=False) neg = layers.reduce_sum(layers.abs(neg_score), -1, keep_dim=False) neg = layers.reshape(neg, shape=[-1, 1], inplace=True) loss = layers.reduce_mean(layers.relu(pos - neg + self.margin)) return [loss]
def forward(self, *args, **kwargs): """ Args: labels (optional, `Variable` of shape [batch_size, seq_len]): ground truth label id for each token Returns: loss (`Variable` of shape []): Cross entropy loss mean over batch and time, ignore positions where label == -100 if labels not set, returns None logits (`Variable` of shape [batch_size, seq_len, hidden_size]): output logits of classifier """ labels = kwargs.pop('labels', None) pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs) hidden = self.dropout(encoded) # maybe not? logits = self.classifier(hidden) if labels is not None: if len(labels.shape) == 2: labels = L.unsqueeze(labels, axes=[-1]) loss = L.softmax_with_cross_entropy(logits, labels) loss = L.reduce_mean(loss) else: loss = None return loss, logits
def create_model(args, config, graph_label): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) hid = L.fc(global_repr, config['hidden_size'], act='relu', name='finetune_fc1') hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2') logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3") loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label) loss = L.reduce_mean(loss) pred = L.sigmoid(logits) keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred'] Agent = namedtuple('Agent', keys) return Agent(loss=loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr, pred=pred)
def soft_dice_loss(logits, labels): probs = L.softmax(logits, axis=-1) one_hot = F.one_hot(labels, depth=probs.shape[-1]) intersection = L.reduce_sum(probs * one_hot, dim=-1) # union = L.reduce_sum(probs, axis=-1) + L.reduce_sum(labels, axis=-1) loss = 1 - intersection return L.reduce_mean(loss)
def __call__(self, msg): alpha = msg["alpha"] # lod-tensor (batch_size, num_heads) if attn_drop: old_h = alpha dropout = F.data(name='attn_drop', shape=[1], dtype="int64") u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'), min=0., max=1.) keeped = L.cast(u > dropout, dtype="float32") self_attn_mask = L.scale(x=keeped, scale=10000.0, bias=-1.0, bias_after_scale=False) n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads, axis=1) n_head_self_attn_mask.stop_gradient = True alpha = n_head_self_attn_mask + alpha alpha = L.lod_reset(alpha, old_h) h = msg["v"] alpha = paddle_helper.sequence_softmax(alpha) self.alpha = alpha old_h = h h = h * alpha h = L.lod_reset(h, old_h) h = L.sequence_pool(h, "sum") if concat: h = L.reshape(h, [-1, num_heads * hidden_size]) else: h = L.reduce_mean(h, dim=1) return h
def node_classify_model(word2id, num_labels, embed_dim=16): """Build node classify model. Args: word2id(dict): map word(node) to its corresponding index num_labels: The number of labels. embed_dim: The dimension of embedding. """ nodes = fl.data('nodes', shape=[None, 1], dtype='int64') labels = fl.data('labels', shape=[None, 1], dtype='int64') embed_nodes = fl.embedding(input=nodes, size=[len(word2id), embed_dim], param_attr=fluid.ParamAttr(name='content')) embed_nodes.stop_gradient = True probs = fl.fc(input=embed_nodes, size=num_labels, act='softmax') predict = fl.argmax(probs, axis=-1) loss = fl.cross_entropy(input=probs, label=labels) loss = fl.reduce_mean(loss) return { 'loss': loss, 'probs': probs, 'predict': predict, 'labels': labels, }
def forward(self, src_ids, *args, **kwargs): tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy()) #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy()) if encode_only: return None, None, info elif tgt_labels is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) #log.debug('input shape %s' % repr(src_ids.shape)) #log.debug(L.gather_nd(src_ids, tgt_pos).numpy()) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1)) ) return loss, logits_2d, info
def forward(self): """forward""" features_list = [self.gw.node_feat["attr"]] for i in range(self.num_layers): h = gin(self.gw, features_list[i], hidden_size=self.hidden_size, activation="relu", name="gin_%s" % (i), init_eps=0.0, train_eps=self.train_eps) h = fl.batch_norm(h) h = fl.relu(h) features_list.append(h) output = 0 for i, h in enumerate(features_list): pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type) drop_h = fl.dropout(pooled_h, self.dropout_prob, dropout_implementation="upscale_in_train") output += fl.fc(drop_h, size=self.num_class, act=None, param_attr=fluid.ParamAttr(name="final_fc_%s" % (i))) # calculate loss self.loss = fl.softmax_with_cross_entropy(output, self.labels) self.loss = fl.reduce_mean(self.loss) self.acc = fl.accuracy(fl.softmax(output), self.labels)
def forward(self, is_test=False): """ Build the network. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) masked_node_indice = layers.data(name="masked_node_indice", shape=[-1, 1], dtype="int64") masked_node_label = layers.data(name="masked_node_label", shape=[-1, 1], dtype="int64") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) masked_node_repr = layers.gather(node_repr, masked_node_indice) logits = layers.fc(masked_node_repr, size=len(CompoundConstants.atom_num_list), name="masked_node_logits") loss, pred = layers.softmax_with_cross_entropy( logits, masked_node_label, return_softmax=True) loss = layers.reduce_mean(loss) acc = layers.accuracy(pred, masked_node_label) self.graph_wrapper = graph_wrapper self.loss = loss
def get_node_repr(self): if self.config.JK == "last": return self.feature_list[-1] elif self.config.JK == "mean": return L.reduce_mean(self.feature_list, axis=0) else: return L.reduce_sum(self.feature_list, axis=0)