def metrics(self, predictions, labels): predictions = L.argmax(predictions, axis=1) labels = L.argmax(labels, axis=1) #predictions = L.unsqueeze(predictions, axes=[1]) acc = propeller.metrics.Acc(labels, predictions) #auc = propeller.metrics.Auc(labels, predictions) return {'acc': acc}
def greedy_search_infilling(model, q_ids, q_sids, sos_id, eos_id, attn_id, max_encode_len=640, max_decode_len=100, tgt_type_id=3): model.eval() _, logits, info = model(q_ids, q_sids) gen_ids = L.argmax(logits, -1) d_batch, d_seqlen = q_ids.shape seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) has_stopped = np.zeros([d_batch], dtype=np.bool) gen_seq_len = np.zeros([d_batch], dtype=np.int64) output_ids = [] past_cache = info['caches'] cls_ids = L.ones([d_batch], dtype='int64') * sos_id attn_ids = L.ones([d_batch], dtype='int64') * attn_id ids = L.stack([cls_ids, attn_ids], -1) for step in range(max_decode_len): bias = gen_bias(q_ids, ids, step) pos_ids = D.to_variable( np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) pos_ids += seqlen _, logits, info = model(ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) gen_ids = L.argmax(logits, -1) past_cached_k, past_cached_v = past_cache cached_k, cached_v = info['caches'] cached_k = [ L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k) ] # concat cached cached_v = [ L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v) ] past_cache = (cached_k, cached_v) gen_ids = gen_ids[:, 1] ids = L.stack([gen_ids, attn_ids], 1) gen_ids = gen_ids.numpy() has_stopped |= (gen_ids == eos_id).astype(np.bool) gen_seq_len += (1 - has_stopped.astype(np.int64)) output_ids.append(gen_ids.tolist()) if has_stopped.all(): break output_ids = np.array(output_ids).transpose([1, 0]) return output_ids
def decode(args, s_arc, s_rel, mask): """Decode function""" mask = mask.numpy() lens = np.sum(mask, -1) # prevent self-loops arc_preds = layers.argmax(s_arc, -1).numpy() bad = [not utils.istree(seq[:i + 1]) for i, seq in zip(lens, arc_preds)] if args.tree and any(bad): arc_preds[bad] = utils.eisner(s_arc.numpy()[bad], mask[bad]) arc_preds = dygraph.to_variable(arc_preds, zero_copy=False) rel_preds = layers.argmax(s_rel, axis=-1) # batch_size, seq_len, _ = rel_preds.shape rel_preds = nn.index_sample(rel_preds, layers.unsqueeze(arc_preds, -1)) rel_preds = layers.squeeze(rel_preds, axes=[-1]) return arc_preds, rel_preds
def get_metrics(self, inputs, outputs): """Get metrics.""" metrics = {} pooled_out = self._get_pooled_output(outputs["enc_out"]) cls_logits = self._get_classifier_output(pooled_out, num_classes=self.num_classes, name="cls") cls_loss, cls_softmax = layers.softmax_with_cross_entropy( logits=cls_logits, label=inputs["label"], return_softmax=True) cls_acc = layers.accuracy(cls_softmax, inputs["label"]) mean_cls_loss = layers.mean(cls_loss) metrics["loss"] = mean_cls_loss metrics["cls_loss"] = mean_cls_loss metrics["cls_acc"] = cls_acc # statistics for recall & precision & f1 if self.num_classes == 2: pred = layers.argmax(cls_softmax, axis=1) label = layers.squeeze(inputs["label"], axes=[1]) metrics["stat_tp"] = layers.reduce_sum( layers.logical_and(pred == 1, label == 1).astype("float32")) metrics["stat_fp"] = layers.reduce_sum( layers.logical_and(pred == 1, label == 0).astype("float32")) metrics["stat_tn"] = layers.reduce_sum( layers.logical_and(pred == 0, label == 0).astype("float32")) metrics["stat_fn"] = layers.reduce_sum( layers.logical_and(pred == 0, label == 1).astype("float32")) return metrics
def create_cam_op(self, predict, class_dim, heatmaps): """compute loss with tensor Args: predict: model output tensor activated by softmax class_dim: dim of multi-class vector heatmaps: 全局池化前的特征图 Returns: heatmaps: class activation map """ if self.main_arch in DenseNetModels: weights_shape = 1024 name = "fc_weights" elif self.main_arch == "xception": weights_shape = 2048 name = "fc_weights" else: raise ValueError( "Calc CAM of model arch {} is not supported.".format( self.main_arch)) fc_weights = FL.create_parameter(shape=[weights_shape, class_dim], dtype='float32', name=name) # 1024, 5 pred_idx = FL.argmax(predict, 1) # bs, 1 fc_weights = FL.transpose(fc_weights, perm=[1, 0]) # 5, 1024 fc_weights = FL.gather(fc_weights, index=pred_idx) # bs, 1024 heatmaps = heatmaps * fc_weights # bs, 1024, 16, 16 heatmaps = FL.reduce_sum(heatmaps, dim=1, keep_dim=False) return heatmaps
def should_continue(i, mel_input, outputs, hidden, attention, state, coeffs): T_enc = coeffs.shape[-1] attn_peak = F.argmax(coeffs[first_mono_attention_layer, 0, 0]) \ if num_monotonic_attention_layers > 0 \ else F.fill_constant([1], "int64", value=0) return i < MAX_STEP and F.reshape(attn_peak, [1]) < T_enc - 1
def node_classify_model(word2id, num_labels, embed_dim=16): """Build node classify model. Args: word2id(dict): map word(node) to its corresponding index num_labels: The number of labels. embed_dim: The dimension of embedding. """ nodes = fl.data('nodes', shape=[None, 1], dtype='int64') labels = fl.data('labels', shape=[None, 1], dtype='int64') embed_nodes = fl.embedding(input=nodes, size=[len(word2id), embed_dim], param_attr=fluid.ParamAttr(name='content')) embed_nodes.stop_gradient = True probs = fl.fc(input=embed_nodes, size=num_labels, act='softmax') predict = fl.argmax(probs, axis=-1) loss = fl.cross_entropy(input=probs, label=labels) loss = fl.reduce_mean(loss) return { 'loss': loss, 'probs': probs, 'predict': predict, 'labels': labels, }
def define_network(self, l_src_ids, l_position_ids, l_sentence_ids, l_input_mask, r_src_ids, r_position_ids, r_sentence_ids, r_input_mask): conf = ErnieConfig(self.conf_path) l_model = ErnieModel(l_src_ids, l_position_ids, l_sentence_ids, task_ids=None, input_mask=l_input_mask, config=conf) l_pool_feature = l_model.get_pooled_output() r_model = ErnieModel(r_src_ids, r_position_ids, r_sentence_ids, task_ids=None, input_mask=r_input_mask, config=conf) r_pool_feature = r_model.get_pooled_output() l_pool_feature.stop_gradient = self.clock r_pool_feature.stop_gradient = self.clock # l_pool_feature = layers.fc(l_pool_feature,128) # r_pool_feature = layers.fc(r_pool_feature,128) self.confidence = layers.cos_sim(l_pool_feature, r_pool_feature) out = layers.fc([l_pool_feature, r_pool_feature], 128) out = layers.fc(out, 32) self.layers_out = layers.fc(out, 11, name="kea_out") # self.confidence = layers.softmax(self.layers_out) layers_out = layers.argmax(self.layers_out, axis=1) return layers_out
def forward(self, src_ids, *args, **kwargs): tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy()) #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy()) if encode_only: return None, None, info elif tgt_labels is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) #log.debug('input shape %s' % repr(src_ids.shape)) #log.debug(L.gather_nd(src_ids, tgt_pos).numpy()) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1)) ) return loss, logits_2d, info
def build_model(self): node_features = self.graph_wrapper.node_feat["feat"] output = self.gcn(gw=self.graph_wrapper, feature=node_features, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_1") output1 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_2") output2 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_3") output = L.concat(input=[output1, output2, output], axis=-1) output, ratio_length = sag_pool(gw=self.graph_wrapper, feature=output, ratio=self.pooling_ratio, graph_id=self.graph_id, dataset=self.args.dataset_name, name="sag_pool_1") output = L.lod_reset(output, self.graph_wrapper.graph_lod) cat1 = L.sequence_pool(output, "sum") ratio_length = L.cast(ratio_length, dtype="float32") cat1 = L.elementwise_div(cat1, ratio_length, axis=-1) cat2 = L.sequence_pool(output, "max") output = L.concat(input=[cat2, cat1], axis=-1) output = L.fc(output, size=self.hidden_size, act="relu") output = L.dropout(output, dropout_prob=self.dropout_ratio) output = L.fc(output, size=self.hidden_size // 2, act="relu") output = L.fc(output, size=self.num_classes, act=None, param_attr=fluid.ParamAttr(name="final_fc")) self.labels = L.cast(self.labels, dtype="float32") loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels) self.loss = L.mean(loss) pred = L.sigmoid(output) self.pred = L.argmax(x=pred, axis=-1) correct = L.equal(self.pred, self.labels_1dim) correct = L.cast(correct, dtype="int32") self.correct = L.reduce_sum(correct)
def evaluate_student(model, dataset): all_pred, all_label = [], [] with D.base._switch_tracer_mode_guard_(is_train=False): model.eval() for step, (ids_student, ids, _, labels) in enumerate(dataset.start()): _, logits = model(ids_student) pred = L.argmax(logits, -1) all_pred.extend(pred.numpy()) all_label.extend(labels.numpy()) f1 = f1_score(all_label, all_pred, average='macro') model.train() return f1
def forward(self, q, k, v, lengths, speaker_embed, start_index, force_monotonic=False, prev_coeffs=None, window=None): # add position encoding as an inductive bias if self.has_bias: # multi-speaker model omega_q = 2 * F.sigmoid( F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1])) omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze( self.k_pos_affine(speaker_embed), axes=[-1])) else: # single-speaker case batch_size = q.shape[0] omega_q = F.ones((batch_size, ), dtype="float32") omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q) k += self.position_encoding_weight * positional_encoding(k, 0, omega_k) q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v) activations = F.matmul(q, k, transpose_y=True) activations /= np.sqrt(self.attention_dim) if self.training: # mask the <pad> parts from the encoder mask = F.sequence_mask(lengths, dtype="float32") attn_bias = F.scale(1. - mask, -1000) activations += F.unsqueeze(attn_bias, [1]) elif force_monotonic: assert window is not None backward_step, forward_step = window T_enc = k.shape[1] batch_size, T_dec, _ = q.shape # actually T_dec = 1 here alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \ if prev_coeffs is None \ else F.argmax(prev_coeffs, axis=-1) backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool") forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool") mask = F.cast(F.logical_xor(backward, forward), "float32") # print("mask's shape:", mask.shape) attn_bias = F.scale(1. - mask, -1000) activations += attn_bias # softmax coefficients = F.softmax(activations, axis=-1) # context vector coefficients = F.dropout(coefficients, 1. - self.keep_prob, dropout_implementation='upscale_in_train') contexts = F.matmul(coefficients, v) # context normalization enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32") contexts *= F.sqrt(enc_lengths) # out affine contexts = self.out_affine(contexts) return contexts, coefficients
def metrics(self, predictions, label): qid, logits = predictions positive_class_logits = L.slice(logits, axes=[1], starts=[1], ends=[2]) mrr = propeller.metrics.Mrr(qid, label, positive_class_logits) predictions = L.argmax(logits, axis=1) predictions = L.unsqueeze(predictions, axes=[1]) f1 = propeller.metrics.F1(label, predictions) acc = propeller.metrics.Acc(label, predictions) #auc = propeller.metrics.Auc(label, predictions) return {'acc': acc, 'f1': f1, 'mrr': mrr}
def _select_column(condition, inputs, column_enc, column_len, ptr_net, grammar, column2table_mask, name=None): """select_column. Args: condition (TYPE): NULL inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1 column_enc (TYPE): NULL column_len (TYPE): NULL ptr_net (TYPE): NULL grammar (TYPE): NULL column2table_mask (Variable): name (str): Returns: TODO Raises: NULL """ condition = layers.cast(condition, dtype='float32') column_mask = layers.sequence_mask(column_len, maxlen=grammar.MAX_COLUMN, dtype='float32') column_mask = layers.reshape(column_mask, [-1, grammar.MAX_COLUMN]) predicts = ptr_net.forward(inputs, column_enc, column_mask) pred_ids = layers.argmax(predicts, axis=-1) valid_table_mask = nn_utils.batch_gather(column2table_mask, pred_ids) ## concat zeros to vocab size zeros_l = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.grammar_size + grammar.MAX_TABLE], dtype='float32', value=-INF) zeros_r = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.MAX_VALUE], dtype='float32', value=-INF) final_output = tensor.concat([zeros_l, predicts, zeros_r], axis=-1) true_final_output = layers.elementwise_mul(final_output, condition, axis=0) true_valid_table_mask = layers.elementwise_mul(valid_table_mask, condition, axis=0) return true_final_output, true_valid_table_mask
def loss_cardinality(self, outputs, targets, indices, num_boxes): """ Compute the cardinality error, ie the absolute error in the number of predicted non-empty boxes This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients """ with dg.no_grad(): pred_logits = outputs[ "pred_logits"] # [bs, num_queries, num_classes] tgt_lengths = dg.to_variable([len(v["labels"]) for v in targets]).astype("float32") # Count the number of predictions that are NOT "no-object" (which is the last class) card_pred = L.reduce_sum( (L.argmax(pred_logits, -1) != pred_logits.shape[-1] - 1).astype("float32")) card_err = F.loss.l1_loss(card_pred, tgt_lengths) losses = {"cardinality_error": card_err} return losses
def forward(self, *args, **kwargs): """ Args tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]): ground trouth target sequence id (hard label) or distribution (soft label) tgt_pos(`Variable` of shape [n_targets, 2]): index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)` encoder_only(Bool): if set, will not return loss, logits_2d Returns: loss(`Variable` of shape []): cross entropy loss mean over every target label. if `encode_only`, returns None. logits(`Variable` of shape [n_targets, vocab_size]): logits for every targets. if `encode_only`, returns None. info(Dictionary): see `ErnieModel` """ tgt_labels = kwargs.pop('tgt_labels', None) tgt_pos = kwargs.pop('tgt_pos', None) encode_only = kwargs.pop('encode_only', False) _, encoded, info = ErnieModel.forward(self, *args, **kwargs) if encode_only: return None, None, info elif tgt_labels is None or tgt_pos is None: encoded = self.mlm(encoded) encoded = self.mlm_ln(encoded) logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias output_ids = L.argmax(logits, -1) return output_ids, logits, info else: encoded_2d = L.gather_nd(encoded, tgt_pos) encoded_2d = self.mlm(encoded_2d) encoded_2d = self.mlm_ln(encoded_2d) logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias if len(tgt_labels.shape) == 1: tgt_labels = L.reshape(tgt_labels, [-1, 1]) loss = L.reduce_mean( L.softmax_with_cross_entropy( logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) return loss, logits_2d, info
def build_model(self, enc_input, dec_input, tgt_label, label_weights): """Build the model with source encoding and target decoding""" enc_word_output, enc_sen_output = self.encode(enc_input) dec_output = self.decode(dec_input, enc_word_output, enc_sen_output) predict_token_idx = layers.argmax(dec_output, axis=-1) correct_token_idx = layers.cast(layers.equal( tgt_label, layers.reshape(predict_token_idx, shape=[-1, 1])), dtype='float32') weighted_correct = layers.elementwise_mul(x=correct_token_idx, y=label_weights, axis=0) sum_correct = layers.reduce_sum(weighted_correct) sum_correct.stop_gradient = True # Padding index do not contribute to the total loss. The weights is used to # cancel padding index in calculating the loss. if self._label_smooth_eps: # TODO: use fluid.input.one_hot after softmax_with_cross_entropy removing # the enforcement that the last dimension of label must be 1. tgt_label = layers.label_smooth(label=layers.one_hot( input=tgt_label, depth=self.voc_size), epsilon=self._label_smooth_eps) cost = layers.softmax_with_cross_entropy( logits=dec_output, label=tgt_label, soft_label=True if self._label_smooth_eps else False) weighted_cost = layers.elementwise_mul(x=cost, y=label_weights, axis=0) sum_cost = layers.reduce_sum(weighted_cost) token_num = layers.reduce_sum(label_weights) token_num.stop_gradient = True avg_cost = sum_cost / token_num graph_vars = { "loss": avg_cost, "sum_correct": sum_correct, "token_num": token_num, } for k, v in graph_vars.items(): v.persistable = True return graph_vars
def model_fn(features, mode, params, run_config): ernie = ErnieModelForSequenceClassification(params, name='') if not params is propeller.RunMode.TRAIN: ernie.eval() metrics, loss = None, None if mode is propeller.RunMode.PREDICT: src_ids, sent_ids = features _, logits = ernie(src_ids, sent_ids) predictions = [ logits, ] else: src_ids, sent_ids, labels = features if mode is propeller.RunMode.EVAL: loss, logits = ernie(src_ids, sent_ids, labels=labels) pred = L.argmax(logits, axis=1) acc = propeller.metrics.Acc(labels, pred) metrics = {'acc': acc} predictions = [pred] else: loss, logits = ernie(src_ids, sent_ids, labels=labels) scheduled_lr, _ = optimization( loss=loss, warmup_steps=int(run_config.max_steps * params['warmup_proportion']), num_train_steps=run_config.max_steps, learning_rate=params['learning_rate'], train_program=F.default_main_program(), startup_prog=F.default_startup_program(), use_fp16=params.use_fp16, weight_decay=params['weight_decay'], scheduler="linear_warmup_decay", ) propeller.summary.scalar('lr', scheduled_lr) predictions = [ logits, ] return propeller.ModelSpec(loss=loss, mode=mode, metrics=metrics, predictions=predictions)
def node_classify_model(config): """Build node classify model. """ nodes = fl.data('nodes', shape=[None, 1], dtype='int64') labels = fl.data('labels', shape=[None, 1], dtype='int64') embed_nodes = fl.embedding(input=nodes, size=[config.num_nodes, config.embed_dim], param_attr=fluid.ParamAttr(name='weight')) embed_nodes.stop_gradient = True probs = fl.fc(input=embed_nodes, size=config.num_labels, act='softmax') predict = fl.argmax(probs, axis=-1) loss = fl.cross_entropy(input=probs, label=labels) loss = fl.reduce_mean(loss) return { 'loss': loss, 'probs': probs, 'predict': predict, 'labels': labels, }
def infer(model, infer_data, max_seq_len=300, is_tensor=True, logits_softmax=True): """ 用dygraph模型预测 [IN] model: dygraph模型结构 infer_data: list[(input1[, input2, ...])], 待预测数据 max_seq_len: int, 最大长度 is_tensor: boolean, true则infer_data已经是paddle可处理的tensor logits_softmax: boolean, true则预测结果为softmax后的logits [OUT] pred: list[float], 预测结果 """ # 在这个with域内ernie不会进行梯度计算; with D.base._switch_tracer_mode_guard_(is_train=False): # 控制模型进入eval模式,这将会关闭所有的dropout; model.eval() # 如果infer_data没有转tensor 则转为paddle接收的tensor if not is_tensor: infer_data = D.to_variable(np.array(infer_data)) logits = model(infer_data, logits_softmax=logits_softmax) # TODO: 返回rate值 pred = L.argmax(logits, -1).numpy() # 进入train模式 model.train() return pred
def forward(self, outputs, target_sizes): """ Perform the computation Parameters: outputs: raw outputs of the model target_sizes: tensor of dimension [batch_size x 2] containing the size of each image For evaluation, this must be the original image size (before any data augmentation) For visualization, this should be the image size after data augment, but before padding """ out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"] assert len(out_logits) == len(target_sizes) assert target_sizes.shape[1] == 2 prob = L.softmax(out_logits, -1) # [bs, num_queries, num_classes + 1] labels = L.argmax(prob[:, :, :], axis=-1) # [bs, num_queries] scores = L.reduce_max(prob, dim=-1) # [bs, num_queries] # convert to [x0, y0, x1, y1] format bs, num_queries, _ = out_bbox.shape out_bbox = L.reshape(out_bbox, (-1, 4)) boxes = box_ops.box_cxcywh_to_xyxy(out_bbox) boxes = L.reshape(boxes, (bs, num_queries, 4)) # and fromm relative [0, 1] to absolute [0, height] coordinates img_h, img_w = target_sizes[:, 0], target_sizes[:, 1] scale_fct = L.stack([img_w, img_h, img_w, img_h], 1) # [bs, 4] scale_fct = L.expand(L.unsqueeze(scale_fct, [1]), (1, num_queries, 1)) boxes = boxes * scale_fct results = [{ 'scores': s, 'labels': l, 'boxes': b } for s, l, b in zip(scores.numpy(), labels.numpy(), boxes.numpy())] return results
def metrics(self, predictions, label): pred, seqlen = predictions pred = L.argmax(pred, axis=-1) pred = L.unsqueeze(pred, axes=[-1]) f1 = propeller.metrics.ChunkF1(label, pred, seqlen, self.num_label) return {'f1': f1}
def call(self, global_img_feat, p_img_feat, embedding_fn, words=None): # 图片特征 img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh') # [batch, k, hid] img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2) if self.mode == 'eval': word = layers.fill_constant_batch_size_like(global_img_feat, [-1], dtype='int64', value=config.data['start_idx']) else: words = layers.transpose(words, [1, 0]) # [seq, batch] words.stop_gradient = True # lstm 初始化 hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat) # While loop 参数初始化 mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length'] if self.mode == 'eval': mx = decoder_config['infer_max_length'] while_op_output = layers.create_array('int64') else: while_op_output = layers.create_array('float32') max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx) step = layers.fill_constant(shape=[1], dtype='int64', value=0) cond = layers.less_than(step, max_step) while_op = layers.While(cond) with while_op.block(): if self.mode == 'train': st = layers.cast(step, 'int32') word = layers.slice(words, axes=[0], starts=st, ends=st + 1) word = layers.squeeze(word, [0]) word.stop_gradient = True word_emb = embedding_fn(word) # 这里可能用+效果更好? xt = layers.concat([word_emb, global_img_feat], axis=-1) # [batch, feat] h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'), bias_attr=fluid.ParamAttr('lstm_b')) p_word_emb = layers.fc(xt, size=self.hid_size) p_hidden = layers.fc(hid, size=self.hid_size) sentinel_gate = layers.sigmoid(p_word_emb + p_hidden) # [batch, hidden] sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c)) # [batch, hidden] layers.assign(h, hid) layers.assign(c, cell) k = layers.shape(p_img_feat)[1] p_hid = layers.fc(h, self.hid_size, act='tanh') # attention 部分 # alpha hid_emb = layers.fc(p_hid, self.hid_size) # [batch, hidden] exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1]) # [batch, k+1, hidden] sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1) # [batch, 1, hidden] feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1) # [batch, k+1, hidden] z = layers.tanh(feat_emb + exp_hid_emb) # [batch, k+1, 1] alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax') # [batch, k+1, 1] # context vector context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1) # [batch, k+1, hidden] context = layers.elementwise_mul(context, alpha, axis=0) context = layers.reduce_mean(context, dim=1) # [batch, hidden] out = layers.fc(context + p_hid, self.hid_size, act='tanh') word_pred = weight_tying_fc(out) # [batch, vocab] if self.mode == 'eval': next_word = layers.argmax(word_pred, axis=-1) layers.assign(next_word, word) next_word = layers.cast(next_word, 'float32') layers.array_write(next_word, step, array=while_op_output) else: layers.array_write(word_pred, step, array=while_op_output) layers.increment(step) layers.less_than(step, max_step, cond=cond) if self.mode == 'train': output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True) output = layers.transpose(output_time_major, [1, 0, 2]) else: output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0] output = layers.transpose(output_time_major, [1, 0]) return output
def _forward(self, inputs, is_training): """ Real forward process of model in different mode(train/test). """ outputs = {} src_token = inputs["src_token"] src_mask = inputs["src_mask"] src_pos = inputs["src_pos"] src_type = inputs["src_type"] src_turn = inputs["src_turn"] tgt_token = inputs["tgt_token"][:, :-1] tgt_mask = inputs["tgt_mask"][:, :-1] tgt_pos = inputs["tgt_pos"][:, :-1] tgt_type = inputs["tgt_type"][:, :-1] tgt_turn = inputs["tgt_turn"][:, :-1] input_mask = layers.concat([src_mask, tgt_mask], axis=1) input_mask.stop_gradient = True src_embed = self.embedder(src_token, src_pos, src_type, src_turn) tgt_embed = self.embedder(tgt_token, tgt_pos, tgt_type, tgt_turn) embed = layers.concat([src_embed, tgt_embed], axis=1) embed = self.embed_layer_norm(embed) batch_size = src_token.shape[0] src_len = src_token.shape[1] tgt_len = tgt_token.shape[1] if self.num_latent > 0: post_embed, post_probs, post_logits = self._posteriori_network( input_mask, embed, batch_size, src_len, tgt_len) outputs["post_logits"] = post_logits if self.use_discriminator: pos_probs, neg_probs = self._discriminator_network( input_mask, embed, batch_size, src_len, tgt_len, post_embed) outputs["pos_probs"] = pos_probs outputs["neg_probs"] = neg_probs if is_training: z = F.gumbel_softmax(post_logits, self.tau) else: indices = layers.argmax(post_logits, axis=1) z = layers.one_hot(F.unsqueeze(indices, [1]), self.num_latent) latent_embeddings = self.latent_embeddings latent_embed = layers.matmul(z, latent_embeddings) outputs["latent_embed"] = latent_embed else: latent_embed = None latent_embed, dec_probs = self._generation_network( input_mask, embed, batch_size, src_len, tgt_len, latent_embed) outputs["dec_probs"] = dec_probs if self.num_latent > 0 and self.with_bow: if self.two_layer_predictor: latent_embed = self.pre_bow_predictor(latent_embed) bow_logits = self.bow_predictor(latent_embed) bow_probs = layers.softmax(bow_logits) outputs["bow_probs"] = bow_probs return outputs
model.clear_gradients() if global_step % args.save_steps == 0: F.save_dygraph(model.state_dict(), args.save_dir + '_%s' % global_step) if global_step % args.eval_steps == 0 and step > 0: y_true, y_pred = [], [] with FD.base._switch_tracer_mode_guard_(is_train=False): model.eval() for step, d in enumerate( tqdm(dev_batch_data, desc='evaluating %d' % epoch)): ids, sids, labels = d ids, sids, labels = FD.to_variable( ids), FD.to_variable(sids), FD.to_variable( labels) loss, logits = model(ids, sids, labels=labels) #print('\n'.join(map(str, logits.numpy().tolist()))) y_pred += L.argmax(logits, -1).numpy().tolist() y_true += labels.numpy().tolist() model.train() if args.debug: print(y_pred[:10], y_true[:10]) f1 = f1_score(y_true, y_pred) print('f1 %.5f' % f1) print(classification_report(y_true, y_pred)) if f1 > bst_f1: F.save_dygraph(model.state_dict(), args.save_dir) bst_f1 = f1 print('saving model with best f1: %.3f' % bst_f1)
def _sampling(self, logits): """ Implement greedy sampling. """ preds = layers.argmax(logits, axis=1) return preds
tea_acc = [] with FD.base._switch_tracer_mode_guard_( is_train=False): ofa_model.model.eval() for step, d in enumerate( tqdm(dev_ds.start(place), desc='evaluating %d' % epoch)): ids, sids, label = d [loss, logits, _], [_, tea_logits, _] = ofa_model( ids, sids, labels=label, num_layers=model_cfg[ 'num_hidden_layers']) a = L.argmax(logits, -1) == label acc.append(a.numpy()) ta = L.argmax(tea_logits, -1) == label tea_acc.append(ta.numpy()) ofa_model.model.train() print( 'width_mult: %f, depth_mult: %f: acc %.5f, teacher acc %.5f' % (width_mult, depth_mult, np.concatenate(acc).mean(), np.concatenate(tea_acc).mean())) if args.save_dir is not None: if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) F.save_dygraph(ofa_model.model.state_dict(), args.save_dir)
def forward(self, ref_image, ref_label, label, k): """ Encode the reference image to get features for weight generation. Args: ref_image ((NxK)x3xHxW): Reference images. ref_label ((NxK)xCxHxW): Reference labels. label (NxCxHxW): Target label. k (int): Number of reference images. Returns: (tuple) - x (NxC2xH2xW2): Encoded features from reference images for the main branch (as input to the decoder). - encoded_ref (list of Variable): Encoded features from reference images for the weight generation branch. - attention (Nx(KxH1xW1)x(H1xW1)): Attention maps. - atn_vis (1x1xH1xW1): Visualization for attention scores. - ref_idx (Nx1): Index for which image to use from the reference image. """ if self.concat_ref_label: # concat reference label map and image together for encoding. concat_ref = L.concat([ref_image, ref_label], axis=1) x = self.ref_img_first(concat_ref) elif self.mul_ref_label: x = self.ref_img_first(ref_image) x_label = self.ref_label_first(ref_label) else: x = self.ref_img_first(ref_image) atn_ref_image = atn_ref_label = None atn = atn_vis = ref_idx = None for i in range(self.num_downsamples): x = getattr(self, 'ref_img_down_' + str(i))(x) if self.mul_ref_label: x_label = getattr(self, 'ref_label_down_' + str(i))(x_label) # Preserve reference for attention module. if k > 1 and i == self.num_downsample_atn - 1: x, atn, atn_vis = self.attention_module(x, label, ref_label) if self.mul_ref_label: x_label, _, _ = self.attention_module( x_label, None, None, atn) atn_sum = L.reshape(atn, (label.shape[0], k, -1)) # [b, k, h*w*h*w] atn_sum = L.reduce_sum(atn_sum, dim=2) ref_idx = L.argmax(atn_sum, axis=1) # Get all corresponding layers in the encoder output for generating # weights in corresponding layers. encoded_image_ref = [x] if self.mul_ref_label: encoded_ref_label = [x_label] for i in reversed(range(self.num_downsamples)): # 4 -> 0 conv = getattr(self, 'ref_img_up_' + str(i))(encoded_image_ref[-1]) encoded_image_ref.append(conv) if self.mul_ref_label: conv_label = getattr(self, 'ref_label_up_' + str(i))( encoded_ref_label[-1]) encoded_ref_label.append(conv_label) if self.mul_ref_label: encoded_ref = [] for i in range(len(encoded_image_ref)): conv, conv_label = encoded_image_ref[i], encoded_ref_label[i] b, c, h, w = conv.shape conv_label = L.softmax(conv_label, axis=1) conv_label = L.reshape(conv_label, (b, 1, c, h * w)) # conv_label = L.expand(conv_label, (1, c, 1, 1)) conv = L.reshape(conv, (b, c, 1, h * w)) # conv = L.expand(conv, (1, 1, c, 1)) conv_prod = conv * conv_label # (b, c, c, h * w) conv_prod = L.reduce_sum(conv_prod, dim=3, keep_dim=True) # (b, c, c, 1) encoded_ref.append(conv_prod) else: encoded_ref = encoded_image_ref encoded_ref = encoded_ref[::-1] # level0 -> level4 return x, encoded_ref, atn, atn_vis, ref_idx
log.debug('train loss %.5f lr %.3e' % (loss.numpy(), opt.current_step_lr())) opt.minimize(loss) model.clear_gradients() if step % 100 == 0: acc = [] with FD.base._switch_tracer_mode_guard_( is_train=False): model.eval() for step, d in enumerate( tqdm(dev_ds.start(place), desc='evaluating %d' % epoch)): ids, sids, label = d loss, logits = model(ids, sids, labels=label) #print('\n'.join(map(str, logits.numpy().tolist()))) a = L.argmax(logits, -1) == label acc.append(a.numpy()) model.train() log.debug('acc %.5f' % np.concatenate(acc).mean()) if args.save_dir is not None: F.save_dygraph(model.state_dict(), args.save_dir) else: feature_column = propeller.data.FeatureColumns([ propeller.data.TextColumn('seg_a', unk_id=tokenizer.unk_id, vocab_dict=tokenizer.vocab, tokenizer=tokenizer.tokenize), ]) assert args.save_dir is not None sd, _ = FD.load_dygraph(args.save_dir)
loss.backward() if step % 10 == 0: log.debug('train loss %.5f lr %.3e' % (loss.numpy(), opt.current_step_lr())) opt.minimize(loss) model.clear_gradients() with FD.base._switch_tracer_mode_guard_(is_train=False): model.eval() FP = 0 TP = 0 FN = 0 TN = 0 for step, d in enumerate(tqdm(dev_ds.start(place), desc='evaluating %d' % epoch)): ids, sids, label = d loss, logits = model(ids, sids, labels=label) #print('\n'.join(map(str, logits.numpy().tolist()))) a = L.argmax(logits, -1).numpy() label = label.numpy() length = a.shape[0] label = np.reshape(label,[length]) for i in range(length): if a[i] == label[i] and a[i] == 1: TP += 1 elif a[i] == label[i] and a[i] == 0: TN += 1 elif a[i] != label[i] and a[i] == 1: FP += 1 elif a[i] != label[i] and a[i] == 0: FN += 1 mcc.append((TP*TN-FP*FN)/math.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))) print('mcc:',mcc[-1]) if args.save_dir is not None: