예제 #1
0
    def init_parameters(self, pretrain_path=None, device=None):
        if pretrain_path is None:
            return
        else:
            INFO("Loading pretrained parameters for LM from {}".format(
                pretrain_path))

            need_pretrain_params_prefix = [
                'decoder.embeddings', 'decoder.cell', 'decoder.linear_hidden',
                'generator'
            ]
            pretrain_params = torch.load(pretrain_path, map_location=device)
            for name, params in pretrain_params.items():
                for pp in need_pretrain_params_prefix:
                    if name.startswith(pp):
                        INFO("Loading param: {}...".format(name))
                        try:
                            self.load_state_dict({name: params}, strict=False)
                        except Exception as e:
                            WARN("{}: {}".format(str(Exception), e))

            INFO("Pretrained model loaded.")

        self.decoder.embeddings.embeddings.weight.requires_grad = False
        self.generator.proj.weight.requires_grad = False
예제 #2
0
    def load_pretrain_model(self, pretrain_path=None, device=None):
        if pretrain_path is None:
            return
        else:
            INFO("Loading pretrained parameters for LM from {}".format(
                pretrain_path))

            need_pretrain_params_prefix = [
                'encoder',
                'decoder.embeddings',
                'generator',
            ]

            pretrained_params_name = []
            pretrain_params = torch.load(pretrain_path, map_location=device)
            for name, param in pretrain_params.items():
                for pp in need_pretrain_params_prefix:
                    if name.startswith(pp):
                        INFO("Loading param: {}...".format(name))
                        try:
                            self.load_state_dict({name: param}, strict=False)
                        except Exception as e:
                            WARN("{}: {}".format(str(Exception), e))
                        pretrained_params_name.append(name)

            # Frozen pretrained parameters
            #            for name, param in self.named_parameters():
            #                if name in pretrained_params_name:
            #                    param.requires_grad = False
            #
            INFO("Pretrained model loaded.")
예제 #3
0
def shuffle(*path):
    f_handles = [open(p) for p in path]

    # Read all the data
    lines = []
    for l in f_handles[0]:
        line = [l.strip()] + [ff.readline().strip() for ff in f_handles[1:]]
        lines.append(line)

    # close file handles
    [f.close() for f in f_handles]

    # random shuffle the data
    INFO('Shuffling data...')
    random.shuffle(lines)
    INFO('Done.')

    # Set up temp files
    f_handles = []
    for p in path:
        _, filename = os.path.split(p)
        f_handles.append(tempfile.TemporaryFile(prefix=filename + '.shuf', dir="/tmp/", mode="a+"))

    for line in lines:
        for ii, f in enumerate(f_handles):
            print(line[ii], file=f)

    # release memory
    lines = []

    # Reset file handles
    [f.seek(0) for f in f_handles]

    return tuple(f_handles)
예제 #4
0
def test_all():

    test_dir = "./tmp"

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    # INFO("=" * 20)
    # INFO("Test transformer training...")
    # test_transformer_train(test_dir)
    # INFO("Done.")
    # INFO("=" * 20)
    #
    # INFO("=" * 20)
    # INFO("Test transformer inference...")
    # test_transformer_inference(test_dir)
    # INFO("Done.")
    # INFO("=" * 20)

    clean_tmp_dir(test_dir)

    INFO("=" * 20)
    INFO("Test DL4MT training...")
    test_dl4mt_train(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test DL4MT inference...")
    test_dl4mt_inference(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    rm_tmp_dir(test_dir)
예제 #5
0
def build_model(model: str, **kwargs):
    if model not in MODEL_CLS:
        raise ValueError(
            "Invalid model class \'{}\' provided. Only {} are supported now.".format(
                model, list(MODEL_CLS.keys())))
    INFO("model_name:  " + model)
    return MODEL_CLS[model](**kwargs)
예제 #6
0
    def __init__(self,
                 d_model,
                 d_inner_hid,
                 n_head,
                 dim_per_head,
                 dropout=0.1,
                 dim_capsule=100,
                 num_capsules=0,
                 null_capsule=False):
        super(DecoderBlock, self).__init__()

        self.slf_attn = MultiHeadedAttention(head_count=n_head,
                                             model_dim=d_model,
                                             dropout=dropout,
                                             dim_per_head=dim_per_head)
        # self.ctx_attn = MultiHeadedAttention(head_count=n_head, model_dim=d_model, dropout=dropout,
        #                                      dim_per_head=dim_per_head)
        self.pos_ffn = PositionwiseFeedForward(size=d_model,
                                               hidden_size=d_inner_hid)

        self.layer_norm_1 = nn.LayerNorm(d_model)
        self.layer_norm_2 = nn.LayerNorm(d_model)

        self.dropout = nn.Dropout(dropout)

        # contextual capsule layer
        self.apply_capsule = True
        # self.pre_capsule_layer_norm = nn.LayerNorm(d_model)

        assert dim_capsule % num_capsules == 0
        self.dim_per_cap = dim_capsule // num_capsules
        dim_per_part = dim_capsule // 3
        total_num_capsules = num_capsules

        self.null_caps = null_capsule
        if null_capsule:
            INFO("Using Null Capsules to attract irrelevant routing.")
            total_num_capsules += num_capsules // 3

        self.capsule_layer = ContextualCapsuleLayer(
            num_out_caps=total_num_capsules,
            num_in_caps=None,
            dim_in_caps=d_model,
            dim_out_caps=self.dim_per_cap,
            dim_context=d_model,
            num_iterations=3,
            share_route_weights_for_in_caps=True)

        self.out_and_cap_ffn = MultiInputPositionwiseFeedForward(
            size=d_model,
            hidden_size=d_inner_hid,
            dropout=dropout,
            inp_sizes=[dim_per_part, dim_per_part, dim_per_part])
예제 #7
0
    def load_external_lm(self, pretrain_path=None, device=None):
        if pretrain_path is None:
            return
        else:
            INFO("Loading pretrained parameters for LM from {}".format(
                pretrain_path))

            need_pretrain_params_prefix = [
                'decoder.cell',
                'decoder.linear_hidden',
            ]
            pretrain_params = torch.load(pretrain_path, map_location=device)
            for name, params in pretrain_params.items():
                for pp in need_pretrain_params_prefix:
                    if name.startswith(pp):
                        INFO("Loading param: {}...".format(name))
                        try:
                            self.load_state_dict({name: params}, strict=False)
                        except Exception as e:
                            WARN("{}: {}".format(str(Exception), e))

            INFO("Pretrained model loaded.")
예제 #8
0
def get_teacher_model(training_configs, model_configs, vocab_src, vocab_tgt, flags):
    # build teacher model
    if training_configs['use_odc']:
        INFO('Building teacher model...')

        teacher_model = build_model(n_src_vocab=vocab_src.max_n_words,
                                    n_tgt_vocab=vocab_tgt.max_n_words, padding_idx=vocab_src.pad, vocab_src=vocab_src,
                                    **model_configs)
        if Constants.USE_GPU:
            teacher_model.cuda()

        if training_configs.get('teacher_model_path', '') != '':
            teacher_model_path = training_configs['teacher_model_path']
            teacher_model.load_state_dict(
                torch.load(teacher_model_path, map_location=Constants.CURRENT_DEVICE), strict=False)
        else:
            teacher_model_path = os.path.join(flags.saveto, flags.model_name + '.teacher.pth')
        INFO('Done.')
    else:
        teacher_model = None
        teacher_model_path = ''

    return teacher_model, teacher_model_path
예제 #9
0
                           beam_size=beam_size,
                           model_path=model_path,
                           use_gpu=False,
                           config_path=config_path,
                           saveto=saveto,
                           max_steps=20)


if __name__ == '__main__':

    test_dir = "./tmp"

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    INFO("=" * 20)
    INFO("Test transformer training...")
    test_transformer_train(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test transformer inference...")
    test_transformer_inference(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test ensemble inference...")
    test_transformer_ensemble_inference(test_dir)
    INFO("Done.")
예제 #10
0
                           config_path=config_path,
                           saveto=saveto,
                           max_steps=20, alpha=alpha)


if __name__ == '__main__':

    test_dir = "./tmp"

    parser = test_utils.build_test_argparser()
    args = parser.parse_args()

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    INFO("=" * 20)
    INFO("Test transformer training...")
    test_transformer_train(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test resuming from training...")
    test_transformer_train(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test transformer inference...")
    test_transformer_inference(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
예제 #11
0
                  model_path=model_path,
                  use_gpu=use_gpu,
                  config_path=config_path,
                  saveto=saveto,
                  max_steps=20)


if __name__ == '__main__':

    parser = test_utils.build_test_argparser()
    args = parser.parse_args()

    test_dir = "./tmp"

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    INFO("=" * 20)
    INFO("Test training with BPE...")
    test_transformer_train(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test inference with BPE...")
    test_transformer_inference(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    test_utils.rm_tmp_dir(test_dir)
예제 #12
0
                  model_path=model_path,
                  use_gpu=use_gpu,
                  config_path=config_path,
                  saveto=saveto,
                  max_steps=20)


if __name__ == '__main__':

    parser = test_utils.build_test_argparser()
    args = parser.parse_args()

    test_dir = "./tmp"

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    INFO("=" * 20)
    INFO("Test DL4MT training...")
    test_dl4mt_train(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test DL4MT inference...")
    test_dl4mt_inference(test_dir, use_gpu=args.use_gpu)
    INFO("Done.")
    INFO("=" * 20)

    test_utils.rm_tmp_dir(test_dir)
예제 #13
0
 def INFO(self):
     if self.nmt_criterion is not None:
         INFO(self.nmt_criterion)
     if self.wordKD_criterion is not None:
         INFO(self.wordKD_criterion)
예제 #14
0
    log_path = os.path.join(test_dir, "log")
    valid_path = os.path.join(test_dir, "valid")

    train.run(model_name=model_name,
              config_path=config_path,
              saveto=saveto,
              log_path=log_path,
              valid_path=valid_path,
              debug=True)

if __name__ == '__main__':

    test_dir = "./tmp"

    if not os.path.exists(test_dir):
        os.makedirs(test_dir, exist_ok=True)

    INFO("=" * 20)
    INFO("Test transformer training...")
    test_transformer_train(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    INFO("=" * 20)
    INFO("Test reloading from latest checkpoint...")
    test_transformer_train(test_dir)
    INFO("Done.")
    INFO("=" * 20)

    test_utils.rm_tmp_dir(test_dir)