Esempio n. 1
0
    def __init__(self, image_model, fusion_method, id_to_vec, emb_size, vocab_size, config, device='cuda:0'):
        super(TextImageTransformerEncoder, self).__init__()

        self.hidden_size = config.hidden_size
        self.fusion_method = fusion_method
        if fusion_method == 'concat':
            self.fc = nn.Linear(self.hidden_size*2, self.hidden_size)
        elif fusion_method == 'mcb':
            self.fusion = fusions.MCB([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'mlb':
            self.fusion = fusions.MLB([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'mutan':
            self.fusion = fusions.Mutan([self.hidden_size, self.hidden_size], self.hidden_size)
        elif fusion_method == 'block':
            self.fusion = fusions.Block([self.hidden_size, self.hidden_size], self.hidden_size)

        if image_model == 'vgg':
            from model.vgg import VggEncoder
            self.image_encoder = VggEncoder(self.hidden_size)
        elif image_model == 'resnet':
            from model.resnet import ResNetEncoder
            self.image_encoder = ResNetEncoder(self.hidden_size)

        from model.transformer import TransformerEncoder
        self.context_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device)
        self.response_encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size, config, device)
        M = torch.FloatTensor(self.hidden_size, self.hidden_size)
        init.xavier_normal_(M)
        self.M = nn.Parameter(M, requires_grad=True)
Esempio n. 2
0
    def __init__(self, args, device='cpu'):
        super().__init__()
        self.args = args
        self.device = device
        self.epoch = 0
        self.dropout = nn.Dropout(self.args.dropout)

        # Entailment Tracking
        # roberta_model_path = '/research/king3/ik_grp/yfgao/pretrain_models/huggingface/roberta-base'
        roberta_model_path = args.pretrained_lm_path
        roberta_config = RobertaConfig.from_pretrained(roberta_model_path,
                                                       cache_dir=None)
        self.roberta = RobertaModel.from_pretrained(roberta_model_path,
                                                    cache_dir=None,
                                                    config=roberta_config)
        encoder_layer = TransformerEncoderLayer(self.args.bert_hidden_size, 12,
                                                4 * self.args.bert_hidden_size)
        encoder_norm = nn.LayerNorm(self.args.bert_hidden_size)
        self.transformer_encoder = TransformerEncoder(encoder_layer,
                                                      args.trans_layer,
                                                      encoder_norm)
        self._reset_transformer_parameters()
        self.w_entail = nn.Linear(self.args.bert_hidden_size, 3, bias=True)

        # Logic Reasoning
        self.entail_emb = nn.Parameter(
            torch.rand(3, self.args.bert_hidden_size))
        nn.init.normal_(self.entail_emb)

        self.w_selfattn = nn.Linear(self.args.bert_hidden_size * 2,
                                    1,
                                    bias=True)
        self.w_output = nn.Linear(self.args.bert_hidden_size * 2, 4, bias=True)
Esempio n. 3
0
    def __init__(self,
                 id_to_vec,
                 emb_size,
                 vocab_size,
                 config,
                 device='cuda:0'):
        super(TextTransformerEncoder, self).__init__()

        from model.transformer import TransformerEncoder
        self.encoder = TransformerEncoder(id_to_vec, emb_size, vocab_size,
                                          config, device)
        self.hidden_size = config.hidden_size
        M = torch.FloatTensor(self.hidden_size, self.hidden_size)
        init.xavier_normal_(M)
        self.M = nn.Parameter(M, requires_grad=True)
Esempio n. 4
0
def detection_branch(Fm, Ftd, fq, out_filters, training, num_layers=0):
    #bottom-up branch
    #Down sampling
    Fm_mid = pool_proj_cat(Ftd[1], Ftd[0], K.int_shape(Fm)[-1] // 2)
    # Down sampling
    Fm_top = pool_proj_cat(Fm_mid, Fm, K.int_shape(Fm)[-1] // 2)
    #projection
    Fm_top = DarknetConv2D_BN_Leaky(K.int_shape(Fm)[-1] // 2, (1, 1))(Fm_top)
    b, h, w, c = K.int_shape(Fm_top)
    if num_layers > 0:
        pos_emb = PositionEmbeddingSine(num_pos_feats=c)(Fm_top)
        Fm_top = K.reshape(Fm_top, (-1, h * w, c))
        pos_emb = K.reshape(pos_emb, (-1, h * w, c))
        Fm_top = TransformerEncoder(embed_dim=c,
                                    num_layers=num_layers)(Fm_top,
                                                           training=training)
        Fm_top = K.reshape(Fm_top, (-1, h, w, c))
    #garan unit
    Fm_top, Att_det = global_attentive_reason_unit(Fm_top, fq)
    #detection
    E = DarknetConv2D(out_filters, (1, 1))(Fm_top)
    return E, Att_det
Esempio n. 5
0
def build_base_model(model_opt, gpu, tokenizer, checkpoint=None, gpu_id=None):
    """Build a model from opts.

    Args:
        model_opt: the option loaded from checkpoint. It's important that
            the opts have been updated and validated. See
            :class:`onmt.utils.parse.ArgumentParser`.
        gpu (bool): whether to use gpu.
        tokenizer: tokenizer used to build embedding layer, if opt.share_tokenizer = true
                   tokenizer is a EasyTokenizer instance else is a dice contain {'src','tgt'}.
        checkpoint: the model gnerated by train phase, or a resumed snapshot
                    model from a stopped training.
        gpu_id (int or NoneType): Which GPU to use.

    Returns:
        the NMTModel.
    """

    # Build source embeddings.
    if opt.share_tokenizer:
        src_emb = build_embeddings(model_opt, tokenizer, src_field)
    else:
        src_emb = build_embeddings(model_opt, tokenizer['src'], src_field)
    # Build encoder.
    encoder = TransformerEncoder.from_opt(model_opt, src_emb)

    # Build target embeddings.
    if opt.share_tokenizer:
        tgt_emb = build_embeddings(model_opt, tokenizer, for_encoder=False)
    else:
        tgt_emb = build_embeddings(model_opt, tokenizer['tgt'], for_encoder=False)
    # Share the embedding matrix - preprocess with share_vocab required.
    if model_opt.share_embeddings:
        if not opt.share_tokenizer:
            # src/tgt vocab should be the same if `-share_vocab` is specified.
            assert src_field.base_field.vocab == tgt_field.base_field.vocab, \
                "preprocess with -share_vocab if you use share_embeddings"
        tgt_emb.word_lut.weight = src_emb.word_lut.weight
    # Build decoder.
    decoder = TransformerDecoder.from_opt(model_opt, src_emb)

    # Build TransformerModel(= encoder + decoder).
    model = TransformerModel(encoder, decoder)

    # Build Generator.
    # copy attention 是另一个论文提出的技术
    if not model_opt.copy_attn:
        if model_opt.generator_function == "sparsemax":
            gen_func = model.modules.sparse_activations.LogSparsemax(dim=-1)
        else:
            gen_func = nn.LogSoftmax(dim=-1)
        generator = nn.Sequential(
            nn.Linear(model_opt.dec_dim_size,
                      len(tokenizer.vocal) if opt.share_tokenizer else len(tokenizer['tgt'].vocab)),
            Cast(torch.float32),
            gen_func
        )
        if model_opt.share_decoder_embeddings:
            generator[0].weight = decoder.embeddings.word_lut.weight
    else:
        tgt_base_field = fields["tgt"].base_field
        vocab_size = len(tgt_base_field.vocab)
        pad_idx = tgt_base_field.vocab.stoi[tgt_base_field.pad_token]
        generator = CopyGenerator(model_opt.dec_dim_size, vocab_size, pad_idx)

    # Load the model states from checkpoint or initialize them.
    if checkpoint is not None:
        # This preserves backward-compat for models using customed layernorm
        def fix_key(s):
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.b_2',
                       r'\1.layer_norm\2.bias', s)
            s = re.sub(r'(.*)\.layer_norm((_\d+)?)\.a_2',
                       r'\1.layer_norm\2.weight', s)
            return s

        checkpoint['model'] = {fix_key(k): v
                               for k, v in checkpoint['model'].items()}
        # end of patch for backward compatibility

        model.load_state_dict(checkpoint['model'], strict=False)
        generator.load_state_dict(checkpoint['generator'], strict=False)
    else:
        # 判断如何初始化参数
        if model_opt.param_init != 0.0:
            for p in model.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
            for p in generator.parameters():
                p.data.uniform_(-model_opt.param_init, model_opt.param_init)
        # 用xavier初始化
        if model_opt.param_init_glorot:
            for p in model.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
            for p in generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
        # 使用预训练词嵌入层参数
        if hasattr(model.encoder, 'embeddings'):
            model.encoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_enc)
        if hasattr(model.decoder, 'embeddings'):
            model.decoder.embeddings.load_pretrained_vectors(
                model_opt.pre_word_vecs_dec)
    # 生成部分
    model.generator = generator

    if gpu and gpu_id is not None:
        device = torch.device("cuda", gpu_id)
    elif gpu and not gpu_id:
        device = torch.device("cuda")
    elif not gpu:
        device = torch.device("cpu")
    model.to(device)
    if model_opt.model_dtype == 'fp16':
        model.half()00000000000000

    return model
Esempio n. 6
0
def main(logger, args):
    df_train, _ = load_data(INPUT_DIR, logger)
    logger.info('Preprocess text')
    if args['debug']:
        df_train = df_train.iloc[:200000]
    else:
        df_train = preprocess_text(df_train)
    seq_train, tokenizer = tokenize_text(df_train, logger)

    logger.info('Pad train text data')
    seq_train = pad_sequences(seq_train,
                              maxlen=PADDING_LENGTH,
                              padding='post',
                              truncating='post')
    pos_train = np.repeat([np.arange(PADDING_LENGTH) + 1],
                          seq_train.shape[0],
                          axis=0)
    pos_train = pos_train * np.not_equal(seq_train, 0)

    label_train = df_train['target'].values.reshape(-1, 1)

    if args['debug']:
        embedding_matrix = np.random.rand(len(tokenizer.word_index) + 1,
                                          300).astype(np.float32)
    else:
        logger.info('Load multiple embeddings')
        embedding_matrices = load_multiple_embeddings(
            tokenizer.word_index,
            embed_types=[0, 2],
            max_workers=args['max_workers'])
        embedding_matrix = np.array(embedding_matrices).mean(0)

    # ===== training and evaluation loop ===== #
    device_ids = args['device_ids']
    output_device = device_ids[0]
    torch.cuda.set_device(device_ids[0])
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = True

    batch_size = args['batch_size'] * len(device_ids)
    epochs = EPOCHS

    logger.info('Start training and evaluation loop')

    model_specs = [{
        'num_head': 4,
        'k_dim': 8,
        'num_layers': 2,
        'dropout': 0.25
    }, {
        'num_head': 4,
        'k_dim': 16,
        'num_layers': 2,
        'dropout': 0.25
    }, {
        'num_head': 8,
        'k_dim': 8,
        'num_layers': 2,
        'dropout': 0.25
    }, {
        'num_head': 8,
        'k_dim': 16,
        'num_layers': 2,
        'dropout': 0.25
    }, {
        'num_head': 8,
        'k_dim': 16,
        'num_layers': 2,
        'dropout': 0.50
    }, {
        'num_head': 4,
        'k_dim': 8,
        'num_layers': 3,
        'dropout': 0.25
    }, {
        'num_head': 8,
        'k_dim': 8,
        'num_layers': 3,
        'dropout': 0.25
    }, {
        'num_head': 8,
        'k_dim': 16,
        'num_layers': 3,
        'dropout': 0.5
    }]

    model_name_base = 'StackedRNNFM'

    for spec_id, spec in enumerate(model_specs):
        model_name = model_name_base + f'_specId={spec_id}_numhead={spec["num_head"]}_kdim={spec["k_dim"]}'
        model_name += f'_numlayers={spec["num_layers"]}_dropout={spec["dropout"]}'

        skf = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED)
        oof_preds_optimized = np.zeros(seq_train.shape[0])
        oof_preds_majority = np.zeros(seq_train.shape[0])
        results = []
        for fold, (index_train, index_valid) in enumerate(
                skf.split(label_train, label_train)):
            logger.info(
                f'Fold {fold + 1} / {KFOLD} - create dataloader and build model'
            )
            x_train = {
                'sequence': seq_train[index_train].astype(int),
                'position': pos_train[index_train].astype(int)
            }
            x_valid = {
                'sequence': seq_train[index_valid].astype(int),
                'position': pos_train[index_valid].astype(int)
            }
            y_train, y_valid = label_train[index_train].astype(
                np.float32), label_train[index_valid].astype(np.float32)

            model = TransformerEncoder(embedding_matrix,
                                       PADDING_LENGTH,
                                       num_layers=spec['num_layers'],
                                       num_head=spec['num_head'],
                                       k_dim=spec['k_dim'],
                                       v_dim=spec['k_dim'],
                                       inner_dim=spec['k_dim'] *
                                       spec['num_head'] * 4,
                                       dropout=0.3,
                                       out_drop=0.5,
                                       out_hidden_dim=64)

            if args['debug']:
                step_size = 100
                scheduler_trigger_steps = 300
            else:
                step_size = 1200
                scheduler_trigger_steps = 4000

            config = {
                'epochs': epochs,
                'batch_size': batch_size,
                'output_device': output_device,
                'criterion_type': 'bce',
                'criteria_weights': [0.5, 0.5],
                'criterion_gamma': 2.0,
                'criterion_alpha': 0.75,
                'optimizer': 'adam',
                'optimizer_lr': 0.0005,
                'num_snapshots': NUM_SNAPSHOTS,
                'scheduler_type': 'cyclic',
                'base_lr': 0.00001,
                'max_lr': 0.0005,
                'step_size': step_size,
                'scheduler_mode': 'triangular',
                'scheduler_gamma': 0.9,
                'scheduler_trigger_steps': scheduler_trigger_steps,
                'sampler_type': 'normal',
                'seed': SEED
            }

            trainer = Trainer(model, logger, config)
            eval_results = trainer.train_and_eval_fold(x_train, y_train,
                                                       x_valid, y_valid, fold)

            oof_preds_majority[index_valid] = np.array(
                [res['preds_binary'] for res in eval_results]).mean(0) > 0.5
            oof_majority_f1 = f1_score(
                label_train.reshape(-1, )[index_valid],
                oof_preds_majority[index_valid])

            oof_preds_proba = np.array(
                [res['preds_proba'] for res in eval_results]).mean(0)
            oof_threshold_mean: float = np.mean(
                [res['best_threshold'] for res in eval_results])
            oof_preds_optimized[
                index_valid] = oof_preds_proba > oof_threshold_mean
            oof_optimized_f1 = f1_score(
                label_train.reshape(-1, )[index_valid],
                oof_preds_optimized[index_valid])

            message = f'Fold {fold + 1} / {KFOLD} has been done.\n'
            message += f'Score: majority voting - {oof_majority_f1:.6f}, optimized threshold - {oof_optimized_f1:.6f}'
            logger.post(message)

            post_to_snapshot_spreadsheet(
                logger,
                SPREADSHEET_SNAPSHOT_URL,
                eval_type='SNAPSHOT',
                tag='SCORE',
                script_name=SCRIPT_NAME,
                model_name=model_name,
                fold=fold,
                snapshot_info=[res['f1'] for res in eval_results])

            post_to_snapshot_spreadsheet(
                logger,
                SPREADSHEET_SNAPSHOT_URL,
                eval_type='SNAPSHOT',
                tag='THRESHOLD',
                script_name=SCRIPT_NAME,
                model_name=model_name,
                fold=fold,
                snapshot_info=[res['best_threshold'] for res in eval_results])

            post_to_main_spreadsheet(logger,
                                     SPREADSHEET_MAIN_URL,
                                     eval_type='SNAPSHOT',
                                     script_name=SCRIPT_NAME,
                                     model_name=model_name,
                                     fold=fold,
                                     f1_majority=oof_majority_f1,
                                     f1_optimized=oof_optimized_f1,
                                     threshold=oof_threshold_mean)

            results.append({
                'f1_majority': oof_majority_f1,
                'f1_optimized': oof_optimized_f1,
                'threshold': oof_threshold_mean
            })

        f1_majority_mean = np.mean([res['f1_majority'] for res in results])
        f1_majority_std = np.std([res['f1_majority'] for res in results])
        f1_optimized_mean = np.mean([res['f1_optimized'] for res in results])
        f1_optimized_std = np.std([res['f1_optimized'] for res in results])
        threshold_mean = np.mean([res['threshold'] for res in results])
        total_metrics = [
            f1_majority_mean, f1_majority_std, f1_optimized_mean,
            f1_optimized_std, threshold_mean
        ]

        post_to_main_spreadsheet(logger,
                                 SPREADSHEET_MAIN_URL,
                                 eval_type='SNAPSHOT',
                                 script_name=SCRIPT_NAME,
                                 model_name=model_name,
                                 fold=-1,
                                 f1_majority=-1,
                                 f1_optimized=-1,
                                 threshold=-1,
                                 others=total_metrics)

        message = 'KFold training and evaluation has been done.\n'
        message += f'F1 majority voting - Avg: {f1_majority_mean}, Std: {f1_majority_std}\n'
        message += f'F1 optimized - Avg: {f1_optimized_mean}, Std: {f1_optimized_std}\n'
        message += f'Threshold - Avg: {threshold_mean}'
        logger.post(message)