def __init__(self, image_model, fusion_method, id_to_vec, emb_size, vocab_size, config, device='cuda:0'): super(TextImageTransformerEncoder, self).__init__() self.hidden_size = config.hidden_size self.fusion_method = fusion_method if fusion_method == 'concat': self.fc = nn.Linear(self.hidden_size*2, self.hidden_size) elif fusion_method == 'mcb': self.fusion = fusions.MCB([self.hidden_size, self.hidden_size], self.hidden_size) elif fusion_method == 'mlb': self.fusion = fusions.MLB([self.hidden_size, self.hidden_size], self.hidden_size) elif fusion_method == 'mutan': self.fusion = fusions.Mutan([self.hidden_size, self.hidden_size], self.hidden_size) elif fusion_method == 'block': self.fusion = fusions.Block([self.hidden_size, self.hidden_size], self.hidden_size) if image_model == 'vgg': from model.vgg import VggEncoder self.image_encoder = VggEncoder(self.hidden_size) elif image_model == 'resnet': from model.resnet import ResNetEncoder self.image_encoder = ResNetEncoder(self.hidden_size) from model.transformer import TransformerEncoder self.context_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device) self.response_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device) M = torch.FloatTensor(self.hidden_size, self.hidden_size) init.xavier_normal_(M) self.M = nn.Parameter(M, requires_grad=True)
def __init__(self, args, device='cpu'): super().__init__() self.args = args self.device = device self.epoch = 0 self.dropout = nn.Dropout(self.args.dropout) # Entailment Tracking # roberta_model_path = '/research/king3/ik_grp/yfgao/pretrain_models/huggingface/roberta-base' roberta_model_path = args.pretrained_lm_path roberta_config = RobertaConfig.from_pretrained(roberta_model_path, cache_dir=None) self.roberta = RobertaModel.from_pretrained(roberta_model_path, cache_dir=None, config=roberta_config) encoder_layer = TransformerEncoderLayer(self.args.bert_hidden_size, 12, 4 * self.args.bert_hidden_size) encoder_norm = nn.LayerNorm(self.args.bert_hidden_size) self.transformer_encoder = TransformerEncoder(encoder_layer, args.trans_layer, encoder_norm) self._reset_transformer_parameters() self.w_entail = nn.Linear(self.args.bert_hidden_size, 3, bias=True) # Logic Reasoning self.entail_emb = nn.Parameter( torch.rand(3, self.args.bert_hidden_size)) nn.init.normal_(self.entail_emb) self.w_selfattn = nn.Linear(self.args.bert_hidden_size * 2, 1, bias=True) self.w_output = nn.Linear(self.args.bert_hidden_size * 2, 4, bias=True)
def __init__(self, id_to_vec, emb_size, vocab_size, config, device='cuda:0'): super(TextTransformerEncoder, self).__init__() from model.transformer import TransformerEncoder self.encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device) self.hidden_size = config.hidden_size M = torch.FloatTensor(self.hidden_size, self.hidden_size) init.xavier_normal_(M) self.M = nn.Parameter(M, requires_grad=True)
def detection_branch(Fm, Ftd, fq, out_filters, training, num_layers=0): #bottom-up branch #Down sampling Fm_mid = pool_proj_cat(Ftd[1], Ftd[0], K.int_shape(Fm)[-1] // 2) # Down sampling Fm_top = pool_proj_cat(Fm_mid, Fm, K.int_shape(Fm)[-1] // 2) #projection Fm_top = DarknetConv2D_BN_Leaky(K.int_shape(Fm)[-1] // 2, (1, 1))(Fm_top) b, h, w, c = K.int_shape(Fm_top) if num_layers > 0: pos_emb = PositionEmbeddingSine(num_pos_feats=c)(Fm_top) Fm_top = K.reshape(Fm_top, (-1, h * w, c)) pos_emb = K.reshape(pos_emb, (-1, h * w, c)) Fm_top = TransformerEncoder(embed_dim=c, num_layers=num_layers)(Fm_top, training=training) Fm_top = K.reshape(Fm_top, (-1, h, w, c)) #garan unit Fm_top, Att_det = global_attentive_reason_unit(Fm_top, fq) #detection E = DarknetConv2D(out_filters, (1, 1))(Fm_top) return E, Att_det
def build_base_model(model_opt, gpu, tokenizer, checkpoint=None, gpu_id=None): """Build a model from opts. Args: model_opt: the option loaded from checkpoint. It's important that the opts have been updated and validated. See :class:`onmt.utils.parse.ArgumentParser`. gpu (bool): whether to use gpu. tokenizer: tokenizer used to build embedding layer, if opt.share_tokenizer = true tokenizer is a EasyTokenizer instance else is a dice contain {'src','tgt'}. checkpoint: the model gnerated by train phase, or a resumed snapshot model from a stopped training. gpu_id (int or NoneType): Which GPU to use. Returns: the NMTModel. """ # Build source embeddings. if opt.share_tokenizer: src_emb = build_embeddings(model_opt, tokenizer, src_field) else: src_emb = build_embeddings(model_opt, tokenizer['src'], src_field) # Build encoder. encoder = TransformerEncoder.from_opt(model_opt, src_emb) # Build target embeddings. if opt.share_tokenizer: tgt_emb = build_embeddings(model_opt, tokenizer, for_encoder=False) else: tgt_emb = build_embeddings(model_opt, tokenizer['tgt'], for_encoder=False) # Share the embedding matrix - preprocess with share_vocab required. if model_opt.share_embeddings: if not opt.share_tokenizer: # src/tgt vocab should be the same if `-share_vocab` is specified. assert src_field.base_field.vocab == tgt_field.base_field.vocab, \ "preprocess with -share_vocab if you use share_embeddings" tgt_emb.word_lut.weight = src_emb.word_lut.weight # Build decoder. decoder = TransformerDecoder.from_opt(model_opt, src_emb) # Build TransformerModel(= encoder + decoder). model = TransformerModel(encoder, decoder) # Build Generator. # copy attention 是另一个论文提出的技术 if not model_opt.copy_attn: if model_opt.generator_function == "sparsemax": gen_func = model.modules.sparse_activations.LogSparsemax(dim=-1) else: gen_func = nn.LogSoftmax(dim=-1) generator = nn.Sequential( nn.Linear(model_opt.dec_dim_size, len(tokenizer.vocal) if opt.share_tokenizer else len(tokenizer['tgt'].vocab)), Cast(torch.float32), gen_func ) if model_opt.share_decoder_embeddings: generator[0].weight = decoder.embeddings.word_lut.weight else: tgt_base_field = fields["tgt"].base_field vocab_size = len(tgt_base_field.vocab) pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token] generator = CopyGenerator(model_opt.dec_dim_size, vocab_size, pad_idx) # Load the model states from checkpoint or initialize them. if checkpoint is not None: # This preserves backward-compat for models using customed layernorm def fix_key(s): s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2', r'\1.layer_norm\2.bias', s) s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2', r'\1.layer_norm\2.weight', s) return s checkpoint['model'] = {fix_key(k): v for k, v in checkpoint['model'].items()} # end of patch for backward compatibility model.load_state_dict(checkpoint['model'], strict=False) generator.load_state_dict(checkpoint['generator'], strict=False) else: # 判断如何初始化参数 if model_opt.param_init != 0.0: for p in model.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) for p in generator.parameters(): p.data.uniform_(-model_opt.param_init, model_opt.param_init) # 用xavier初始化 if model_opt.param_init_glorot: for p in model.parameters(): if p.dim() > 1: xavier_uniform_(p) for p in generator.parameters(): if p.dim() > 1: xavier_uniform_(p) # 使用预训练词嵌入层参数 if hasattr(model.encoder, 'embeddings'): model.encoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_enc) if hasattr(model.decoder, 'embeddings'): model.decoder.embeddings.load_pretrained_vectors( model_opt.pre_word_vecs_dec) # 生成部分 model.generator = generator if gpu and gpu_id is not None: device = torch.device("cuda", gpu_id) elif gpu and not gpu_id: device = torch.device("cuda") elif not gpu: device = torch.device("cpu") model.to(device) if model_opt.model_dtype == 'fp16': model.half()00000000000000 return model
def main(logger, args): df_train, _ = load_data(INPUT_DIR, logger) logger.info('Preprocess text') if args['debug']: df_train = df_train.iloc[:200000] else: df_train = preprocess_text(df_train) seq_train, tokenizer = tokenize_text(df_train, logger) logger.info('Pad train text data') seq_train = pad_sequences(seq_train, maxlen=PADDING_LENGTH, padding='post', truncating='post') pos_train = np.repeat([np.arange(PADDING_LENGTH) + 1], seq_train.shape[0], axis=0) pos_train = pos_train * np.not_equal(seq_train, 0) label_train = df_train['target'].values.reshape(-1, 1) if args['debug']: embedding_matrix = np.random.rand(len(tokenizer.word_index) + 1, 300).astype(np.float32) else: logger.info('Load multiple embeddings') embedding_matrices = load_multiple_embeddings( tokenizer.word_index, embed_types=[0, 2], max_workers=args['max_workers']) embedding_matrix = np.array(embedding_matrices).mean(0) # ===== training and evaluation loop ===== # device_ids = args['device_ids'] output_device = device_ids[0] torch.cuda.set_device(device_ids[0]) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True batch_size = args['batch_size'] * len(device_ids) epochs = EPOCHS logger.info('Start training and evaluation loop') model_specs = [{ 'num_head': 4, 'k_dim': 8, 'num_layers': 2, 'dropout': 0.25 }, { 'num_head': 4, 'k_dim': 16, 'num_layers': 2, 'dropout': 0.25 }, { 'num_head': 8, 'k_dim': 8, 'num_layers': 2, 'dropout': 0.25 }, { 'num_head': 8, 'k_dim': 16, 'num_layers': 2, 'dropout': 0.25 }, { 'num_head': 8, 'k_dim': 16, 'num_layers': 2, 'dropout': 0.50 }, { 'num_head': 4, 'k_dim': 8, 'num_layers': 3, 'dropout': 0.25 }, { 'num_head': 8, 'k_dim': 8, 'num_layers': 3, 'dropout': 0.25 }, { 'num_head': 8, 'k_dim': 16, 'num_layers': 3, 'dropout': 0.5 }] model_name_base = 'StackedRNNFM' for spec_id, spec in enumerate(model_specs): model_name = model_name_base + f'_specId={spec_id}_numhead={spec["num_head"]}_kdim={spec["k_dim"]}' model_name += f'_numlayers={spec["num_layers"]}_dropout={spec["dropout"]}' skf = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED) oof_preds_optimized = np.zeros(seq_train.shape[0]) oof_preds_majority = np.zeros(seq_train.shape[0]) results = [] for fold, (index_train, index_valid) in enumerate( skf.split(label_train, label_train)): logger.info( f'Fold {fold + 1} / {KFOLD} - create dataloader and build model' ) x_train = { 'sequence': seq_train[index_train].astype(int), 'position': pos_train[index_train].astype(int) } x_valid = { 'sequence': seq_train[index_valid].astype(int), 'position': pos_train[index_valid].astype(int) } y_train, y_valid = label_train[index_train].astype( np.float32), label_train[index_valid].astype(np.float32) model = TransformerEncoder(embedding_matrix, PADDING_LENGTH, num_layers=spec['num_layers'], num_head=spec['num_head'], k_dim=spec['k_dim'], v_dim=spec['k_dim'], inner_dim=spec['k_dim'] * spec['num_head'] * 4, dropout=0.3, out_drop=0.5, out_hidden_dim=64) if args['debug']: step_size = 100 scheduler_trigger_steps = 300 else: step_size = 1200 scheduler_trigger_steps = 4000 config = { 'epochs': epochs, 'batch_size': batch_size, 'output_device': output_device, 'criterion_type': 'bce', 'criteria_weights': [0.5, 0.5], 'criterion_gamma': 2.0, 'criterion_alpha': 0.75, 'optimizer': 'adam', 'optimizer_lr': 0.0005, 'num_snapshots': NUM_SNAPSHOTS, 'scheduler_type': 'cyclic', 'base_lr': 0.00001, 'max_lr': 0.0005, 'step_size': step_size, 'scheduler_mode': 'triangular', 'scheduler_gamma': 0.9, 'scheduler_trigger_steps': scheduler_trigger_steps, 'sampler_type': 'normal', 'seed': SEED } trainer = Trainer(model, logger, config) eval_results = trainer.train_and_eval_fold(x_train, y_train, x_valid, y_valid, fold) oof_preds_majority[index_valid] = np.array( [res['preds_binary'] for res in eval_results]).mean(0) > 0.5 oof_majority_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_majority[index_valid]) oof_preds_proba = np.array( [res['preds_proba'] for res in eval_results]).mean(0) oof_threshold_mean: float = np.mean( [res['best_threshold'] for res in eval_results]) oof_preds_optimized[ index_valid] = oof_preds_proba > oof_threshold_mean oof_optimized_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_optimized[index_valid]) message = f'Fold {fold + 1} / {KFOLD} has been done.\n' message += f'Score: majority voting - {oof_majority_f1:.6f}, optimized threshold - {oof_optimized_f1:.6f}' logger.post(message) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='SCORE', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['f1'] for res in eval_results]) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='THRESHOLD', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['best_threshold'] for res in eval_results]) post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, f1_majority=oof_majority_f1, f1_optimized=oof_optimized_f1, threshold=oof_threshold_mean) results.append({ 'f1_majority': oof_majority_f1, 'f1_optimized': oof_optimized_f1, 'threshold': oof_threshold_mean }) f1_majority_mean = np.mean([res['f1_majority'] for res in results]) f1_majority_std = np.std([res['f1_majority'] for res in results]) f1_optimized_mean = np.mean([res['f1_optimized'] for res in results]) f1_optimized_std = np.std([res['f1_optimized'] for res in results]) threshold_mean = np.mean([res['threshold'] for res in results]) total_metrics = [ f1_majority_mean, f1_majority_std, f1_optimized_mean, f1_optimized_std, threshold_mean ] post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=-1, f1_majority=-1, f1_optimized=-1, threshold=-1, others=total_metrics) message = 'KFold training and evaluation has been done.\n' message += f'F1 majority voting - Avg: {f1_majority_mean}, Std: {f1_majority_std}\n' message += f'F1 optimized - Avg: {f1_optimized_mean}, Std: {f1_optimized_std}\n' message += f'Threshold - Avg: {threshold_mean}' logger.post(message)