コード例 #1
0
def main(args):
    assert os.path.exists(args.src_emb)
    assert os.path.exists(args.tgt_emb)
    src_emb, tgt_emb, mapping, _ = build_model(args, False)

    # get the mapped word embeddings as vectors of shape [max_vocab_size, embedding_size]
    src_emb = mapping(src_emb.weight).data
    tgt_emb = tgt_emb.weight.data

    id2word1 = {id_: word for word, id_ in args.src_dico.word2id.items()}
    id2word2 = {id_: word for word, id_ in args.tgt_dico.word2id.items()}

    top_k_match_ids = get_word_translations(src_emb, tgt_emb, args.knn)

    output_file = '%s-%s.txt' % (args.src_lang, args.tgt_lang)
    print('Writing to %s...' % output_file)
    with open(output_file, 'w', encoding='utf-8') as f:
        for src_id, (tgt_ids, tgt_scores) in enumerate(top_k_match_ids):
            for tgt_id, score in zip(tgt_ids, tgt_scores):
                if args.cuda:
                    tgt_id, score = tgt_id.cpu(), score.cpu()
                if args.output_scores:
                    f.write('%s %s %.4f\n' % (id2word1[src_id], id2word2[int(
                        tgt_id.numpy())], float(score.numpy())))
                else:
                    f.write('%s %s\n' %
                            (id2word1[src_id], id2word2[int(tgt_id.numpy())]))
コード例 #2
0
def main(cl_arguments):
    ''' Run REPL for a CoLA model '''

    # Arguments handling #
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)
    check_arg_name(args)
    assert args.target_tasks == "cola", \
        "Currently only supporting CoLA. ({})".format(args.target_tasks)

    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
        except Exception:
            log.warning("GPU access failed. You might be using a CPU-only"
                        " installation of PyTorch. Falling back to CPU.")
            args.cuda = -1

    # Prepare data #
    _, target_tasks, vocab, word_embs = build_tasks(args)
    tasks = sorted(set(target_tasks), key=lambda x: x.name)

    # Build or load model #
    model = build_model(args, vocab, word_embs, tasks)
    log.info("Loading existing model from %s...", cl_args.model_file_path)
    load_model_state(model,
                     cl_args.model_file_path,
                     args.cuda, [],
                     strict=False)

    # Inference Setup #
    model.eval()
    vocab = Vocabulary.from_files(os.path.join(args.exp_dir, 'vocab'))
    indexers = build_indexers(args)
    task = take_one(tasks)

    # Run Inference #
    if cl_args.inference_mode == "repl":
        assert cl_args.input_path is None
        assert cl_args.output_path is None
        print("Running REPL for task: {}".format(task.name))
        run_repl(model, vocab, indexers, task, args)
    elif cl_args.inference_mode == "corpus":
        run_corpus_inference(
            model,
            vocab,
            indexers,
            task,
            args,
            cl_args.input_path,
            cl_args.input_format,
            cl_args.output_path,
            cl_args.eval_output_path,
        )
    else:
        raise KeyError(cl_args.inference_mode)
コード例 #3
0
def eval_model(cfg: CfgNode) -> Dict[str, float]:

    model = build_model(cfg)
    chainer.serializers.load_npz(cfg.TEST.CHECKPOINT, model)

    converter = get_converter(data_name=cfg.DATASET.NAME,
                              use_iou=cfg.MODEL.USE_IOU)

    val_iterator = build_dataloader("val", cfg)[0]
    test_iterator = build_dataloader("test", cfg)[0]

    device_id = cfg.TEST.DEVICE

    val_pred_scores = get_predicted_scores(model, val_iterator, converter,
                                           device_id)

    label = val_iterator.dataset.label
    precision, recall, thresholds = precision_recall_curve(
        label, val_pred_scores)

    f1 = 2 * (precision * recall) / (precision + recall)
    best_ind = np.nanargmax(f1)
    best_threshold = thresholds[best_ind]

    pred_scores = get_predicted_scores(model, test_iterator, converter,
                                       device_id)

    label = test_iterator.dataset.label
    pred_label = pred_scores > best_threshold

    f1 = f1_score(label, pred_label)
    prec = precision_score(label, pred_label)
    recall = recall_score(label, pred_label)

    return {"f1": f1, "precision": prec, "recall": recall}
コード例 #4
0
def load_model(model_filename, use_cuda):
    state_dict = torch.load(model_filename)
    model, discriminator = build_model(
        rnn_size=state_dict['rnn_size'],
        output_size=state_dict['output_size'],
        encoder_n_layers=state_dict['encoder_n_layers'],
        decoder_n_layers=state_dict['decoder_n_layers'],
        dropout=state_dict['dropout'],
        discriminator_hidden_size=state_dict['discriminator_hidden_size'],
        max_length=state_dict['max_length'],
        enable_embedding_training=state_dict['enable_embedding_training'],
        use_cuda=use_cuda,
        bidirectional=state_dict['bidirectional'],
        use_attention=state_dict['attention'])
    model.load_state_dict(state_dict['model'])
    discriminator.load_state_dict(state_dict['discriminator'])
    model = model.cuda() if use_cuda else model
    discriminator = discriminator.cuda() if use_cuda else discriminator

    main_optimizer, discriminator_optimizer = init_optimizers(
        model, discriminator)
    main_optimizer.load_state_dict(state_dict['main_optimizer'])
    discriminator_optimizer.load_state_dict(
        state_dict['discriminator_optimizer'])

    return model, discriminator, main_optimizer, discriminator_optimizer
コード例 #5
0
def main():
    torch.manual_seed(12345)
    args = parse_args()
    cfg = Config.fromfile(args.config)

    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    _logger = init_logger(cfg.work_dir, 'INFO')
    _logger.info(cfg)
    
    print('before init_process')
    init_process(cfg.dist_config)
    print('after init_process')
    print('before build_model')
    model = build_model(cfg.model)
    print('after build_model')
    print('before train_dataloader')
    train_dataloader = get_dataloader(cfg.data.train_data, cfg.data.train_dataloader)
    print('after train_dataloader')
    val_dataloader = train_dataloader
    dataloaders = {'train': train_dataloader, 'val': val_dataloader}
    try:
        train_model(
          model, 
          dataloaders,
          cfg,
        )     
    except KeyboardInterrupt:
        print('KeyboardInterrupt')
        dist.destroy_process_group()
コード例 #6
0
ファイル: filter.py プロジェクト: wangqi1996/njunmt
    def __init__(self, config_path, model_path, model_type):
        print(config_path)
        print(model_path)
        print(model_type)
        self.model_type = model_type
        configs = prepare_configs(config_path)

        data_configs = configs['data_configs']
        model_configs = configs['model_configs']

        vocab_src = Vocabulary.build_from_file(
            **data_configs['vocabularies'][0])
        vocab_tgt = Vocabulary.build_from_file(
            **data_configs['vocabularies'][1])

        nmt_model = build_model(n_src_vocab=vocab_src.max_n_words,
                                n_tgt_vocab=vocab_tgt.max_n_words,
                                padding_idx=vocab_src.pad,
                                **model_configs)

        params = load_model_parameters(model_path, map_location="cpu")
        nmt_model.load_state_dict(params)
        nmt_model.cuda()
        nmt_model.eval()

        self.model = nmt_model
        self.data_configs = data_configs
        self.model_configs = model_configs
        self.vocab_src = vocab_src
        self.vocab_tgt = vocab_tgt
コード例 #7
0
def build_translate_model(
    victim_config,
    victim_model_path,
    vocab_src,
    vocab_trg,
    device,
):
    """
    build translation env
    :param victim_config: victim configs
    :param victim_model_path: victim_models
    :param vocab_src: source vocabulary
    :param vocab_trg: target vocabulary
    :param device: map location (cpu or cuda:*)
    :return: nmt_models used in the beam-search
    """
    translate_model_configs = victim_config["model_configs"]

    # build model for translation
    nmt_model = build_model(n_src_vocab=vocab_src.max_n_words,
                            n_tgt_vocab=vocab_trg.max_n_words,
                            **translate_model_configs)
    nmt_model.to(device)
    INFO("load embedding params to device %s" % device)
    params = load_translate_model(victim_model_path, map_location=device)
    nmt_model.load_state_dict(params)
    INFO("finished building translation model for environment on %s" % device)
    return nmt_model
コード例 #8
0
def main():
    args = parse_args()
    cfg = Config.fromfile(args.config)

    # set cuda
    cfg.cuda = not args.no_cuda and torch.cuda.is_available()

    # set cudnn_benchmark & cudnn_deterministic
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    if cfg.get('cudnn_deterministic', False):
        torch.backends.cudnn.deterministic = True

    # update configs according to args
    if not hasattr(cfg, 'work_dir'):
        if args.work_dir is not None:
            cfg.work_dir = args.work_dir
        else:
            cfg_name = rm_suffix(os.path.basename(args.config))
            cfg.work_dir = os.path.join('./data/work_dir', cfg_name)
    mkdir_if_no_exists(cfg.work_dir, is_folder=True)

    cfg.load_from = args.load_from
    cfg.resume_from = args.resume_from

    cfg.gpus = args.gpus
    cfg.distributed = args.distributed

    cfg.random_conns = args.random_conns
    cfg.eval_interim = args.eval_interim
    cfg.save_output = args.save_output
    cfg.force = args.force

    for data in ['train_data', 'test_data']:
        if not hasattr(cfg, data):
            continue
        cfg[data].eval_interim = cfg.eval_interim
        if not hasattr(cfg[data], 'knn_graph_path') or not os.path.isfile(
                cfg[data].knn_graph_path):
            cfg[data].prefix = cfg.prefix
            cfg[data].knn = cfg.knn
            cfg[data].knn_method = cfg.knn_method
            name = 'train_name' if data == 'train_data' else 'test_name'
            cfg[data].name = cfg[name]

    logger = create_logger()

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)

    model = build_model(cfg.model['type'], **cfg.model['kwargs'])
    handler = build_handler(args.phase, cfg.model['type'])

    handler(model, cfg, logger)
コード例 #9
0
ファイル: train.py プロジェクト: saeedesm/UNMT_AH
def init_zero_supervised(vocabulary, save_file, use_cuda):
    model, discriminator = build_model(
        max_length=opt.max_length,
        output_size=vocabulary.size(),
        rnn_size=opt.rnn_size,
        encoder_n_layers=opt.layers,
        decoder_n_layers=opt.layers,
        dropout=opt.dropout,
        use_cuda=use_cuda,
        enable_embedding_training=bool(opt.sv_embedding_training),
        discriminator_hidden_size=opt.discriminator_hidden_size,
        bidirectional=bool(opt.bidirectional),
        use_attention=bool(opt.attention))
    if opt.src_embeddings is not None:
        load_embeddings(model,
                        src_embeddings_filename=opt.src_embeddings,
                        tgt_embeddings_filename=opt.tgt_embeddings,
                        vocabulary=vocabulary)
    model = model.cuda() if use_cuda else model
    discriminator = discriminator.cuda() if use_cuda else discriminator
    print_summary(model)

    trainer = Trainer(
        vocabulary,
        max_length=opt.max_length,
        use_cuda=use_cuda,
        discriminator_lr=opt.discriminator_lr,
        main_lr=opt.sv_learning_rate,
        main_betas=(opt.adam_beta1, 0.999),
    )

    if opt.sv_load_from:
        model, discriminator, main_optimizer, discriminator_optimizer = load_model(
            opt.sv_load_from, use_cuda)
        trainer.main_optimizer = main_optimizer
        trainer.discriminator_optimizer = discriminator_optimizer
    else:
        pair_file_names = [
            (opt.train_src_bi, opt.train_tgt_bi),
        ]
        trainer.train_supervised(model,
                                 discriminator,
                                 pair_file_names,
                                 vocabulary,
                                 num_words_in_batch=opt.sv_num_words_in_batch,
                                 max_length=opt.max_length,
                                 save_file=save_file,
                                 big_epochs=opt.supervised_epochs,
                                 print_every=opt.print_every,
                                 save_every=opt.save_every,
                                 max_batch_count=opt.n_supervised_batches)
    for param in model.parameters():
        param.requires_grad = False
    return Translator(model, vocabulary, use_cuda)
コード例 #10
0
def main():
    torch.manual_seed(0)
    args = parse_args()
    cfg = Config.fromfile(args.config)

    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    _logger = init_logger(cfg.work_dir, 'INFO')
    _logger.info(cfg)

    
    print('before init_process')
    init_process(cfg.dist_config)

    rank = dist.get_rank()
    print('rank={}'.format(rank))
    print('world_size={}'.format(dist.get_world_size()))

    print('after init_process')
    print('before build_model')
    model = build_model(cfg.model)
    print('after build_model')
    print('before train_dataloader')
    if rank in cfg.base_model_ranks:
        train_dataloader = get_dataloader(cfg.data.train_data, cfg.data.train_dataloader)
        val_dataloader = train_dataloader
        dataloaders = {'train': train_dataloader, 'val': val_dataloader}
    else:
        dataloaders = {'train': None, 'val': None}

    if cfg.data.train_num_samples:
        cfg.data.dataloader_lens = cfg.data.train_num_samples // len(cfg.base_model_ranks) // cfg.data.batch_size
    else:
        cfg.data.dataloader_lens = 5822653 // len(cfg.base_model_ranks) // cfg.data.batch_size

    if not cfg.load_top:
        if rank in cfg.top_model_ranks:
            cfg.load_from = None
    try:
        train_nbase_mtop_model(
          model,
          dataloaders,
          cfg,
        )
    except KeyboardInterrupt:
        print('KeyboardInterrupt')
        dist.destroy_process_group()
コード例 #11
0
def main(config: DictConfig, ckpt: dict, show_dir: Optional[str] = None):

    # seed
    if config.SEED is not None:
        make_deterministic(seed=config.SEED)

    # data
    test_loader = get_test_loader(config)

    # model
    model = build_model(config, model_state_dict=ckpt['model_state_dict'])

    # test
    test(model=model,
         data_loader=test_loader,
         device=config.DEVICE,
         threshold_edge=config.TEST.THRESHOLD_EDGE,
         show_dir=show_dir)
コード例 #12
0
ファイル: exp.py プロジェクト: j-luo93/MUSE
def get_models(params):
    assert not params.cuda or torch.cuda.is_available()
    assert 0 <= params.dis_dropout < 1
    assert 0 <= params.dis_input_dropout < 1
    assert 0 <= params.dis_smooth < 0.5
    assert params.dis_lambda > 0 and params.dis_steps > 0
    assert 0 < params.lr_shrink <= 1
    assert os.path.isfile(params.src_emb)
    assert os.path.isfile(params.tgt_emb)
    assert params.dico_eval == 'default' or os.path.isfile(params.dico_eval)
    assert params.export in ["", "txt", "pth"]

    # build model / trainer / evaluator
    logger = initialize_exp(params)
    src_emb, tgt_emb, mapping, discriminator = build_model(params, True)
    trainer = Trainer(src_emb, tgt_emb, mapping, discriminator, params)
    trainer.reload_best()

    evaluator = Evaluator(trainer)
    return evaluator, trainer
コード例 #13
0
ファイル: run_muse.py プロジェクト: krylea/MUSE
def run_model(params, runid):
    params.exp_name = params.src_lang + params.tgt_lang if params.exp_name is None else params.exp_name
    seed = np.random.randint(10000, 20000)
    params.seed = seed
    params.exp_id = str(runid)
    params.exp_path = ''
    # build model / trainer / evaluator
    logger = initialize_exp(params)
    src_emb, tgt_emb, mapping, discriminator = build_model(params, True)
    trainer = Trainer(src_emb, tgt_emb, mapping, discriminator, params)
    evaluator = Evaluator(trainer)

    base_nn, base_csls = _adversarial(params, logger, trainer, evaluator)

    outputs = {
        "run": runid,
        "seed": seed,
        "base_nn": base_nn,
        "base_csls": base_csls
    }

    return logger, trainer, evaluator, outputs
コード例 #14
0
ファイル: odc_util.py プロジェクト: wangqi1996/njunmt
def get_teacher_model(training_configs, model_configs, vocab_src, vocab_tgt, flags):
    # build teacher model
    if training_configs['use_odc']:
        INFO('Building teacher model...')

        teacher_model = build_model(n_src_vocab=vocab_src.max_n_words,
                                    n_tgt_vocab=vocab_tgt.max_n_words, padding_idx=vocab_src.pad, vocab_src=vocab_src,
                                    **model_configs)
        if Constants.USE_GPU:
            teacher_model.cuda()

        if training_configs.get('teacher_model_path', '') != '':
            teacher_model_path = training_configs['teacher_model_path']
            teacher_model.load_state_dict(
                torch.load(teacher_model_path, map_location=Constants.CURRENT_DEVICE), strict=False)
        else:
            teacher_model_path = os.path.join(flags.saveto, flags.model_name + '.teacher.pth')
        INFO('Done.')
    else:
        teacher_model = None
        teacher_model_path = ''

    return teacher_model, teacher_model_path
コード例 #15
0
def main():
    logging.basicConfig(level=logging.DEBUG)
    
    logger = logging.getLogger("unmt")
    logger.propagate = False
    fh = logging.FileHandler(opt.log_file)
    fh.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    logger.addHandler(fh)
    logger.addHandler(ch)
    
    use_cuda = torch.cuda.is_available()
    logger.info("Use CUDA: " + str(use_cuda))
  
    _, _, vocabulary = collect_vocabularies(
            src_vocabulary_path=opt.src_vocabulary,
            tgt_vocabulary_path=opt.tgt_vocabulary,
            all_vocabulary_path=opt.all_vocabulary,
            src_file_names=(opt.train_src_mono, ),
            tgt_file_names=(opt.train_tgt_mono, ),
            src_max_words=opt.src_vocab_size,
            tgt_max_words=opt.tgt_vocab_size,
            reset=bool(opt.reset_vocabularies))

    if opt.src_to_tgt_dict is not None and opt.tgt_to_src_dict is not None:
        zero_model = WordByWordModel(opt.src_to_tgt_dict, opt.tgt_to_src_dict, vocabulary, opt.max_length)
    elif opt.bootstrapped_model is not None:
        model, discriminator, _, _ = load_model(opt.bootstrapped_model, use_cuda)
        for param in model.parameters():
            param.requires_grad = False
        zero_model = Translator(model, vocabulary, use_cuda)
    elif opt.train_src_bi is not None and opt.train_tgt_bi is not None:
        zero_model = init_zero_supervised(vocabulary, opt.save_model, use_cuda)
    else:
        assert False, "Zero model was not initialized"
    
    trainer = Trainer(vocabulary,
                      max_length=opt.max_length,
                      use_cuda=use_cuda,
                      discriminator_lr=opt.discriminator_lr,
                      main_lr=opt.learning_rate,
                      main_betas=(opt.adam_beta1, 0.999),)
    trainer.current_translation_model = zero_model

    model, discriminator = build_model(
        max_length=opt.max_length,
        output_size=vocabulary.size(),
        rnn_size=opt.rnn_size,
        encoder_n_layers=opt.layers,
        decoder_n_layers=opt.layers,
        dropout=opt.dropout,
        use_cuda=use_cuda,
        enable_embedding_training=bool(opt.usv_embedding_training),
        discriminator_hidden_size=opt.discriminator_hidden_size,
        bidirectional=bool(opt.bidirectional),
        use_attention=bool(opt.attention)
    )
    if opt.src_embeddings is not None:
        load_embeddings(model,
                        src_embeddings_filename=opt.src_embeddings,
                        tgt_embeddings_filename=opt.tgt_embeddings,
                        vocabulary=vocabulary)
    model = model.cuda() if use_cuda else model
    print_summary(model)
    print_summary(discriminator)
    discriminator = discriminator.cuda() if use_cuda else discriminator

    if opt.usv_load_from:
        model, discriminator, main_optimizer, discriminator_optimizer = load_model(opt.usv_load_from, use_cuda)
        trainer.main_optimizer = main_optimizer
        trainer.discriminator_optimizer = discriminator_optimizer

    trainer.train(model, discriminator,
                  src_file_names=[opt.train_src_mono, ],
                  tgt_file_names=[opt.train_tgt_mono, ],
                  unsupervised_big_epochs=opt.unsupervised_epochs,
                  num_words_in_batch=opt.usv_num_words_in_batch,
                  print_every=opt.print_every,
                  save_every=opt.save_every,
                  save_file=opt.save_model,
                  n_unsupervised_batches=opt.n_unsupervised_batches,
                  enable_unsupervised_backtranslation=opt.enable_unsupervised_backtranslation,
                  teacher_forcing=bool(opt.teacher_forcing),
                  max_length=opt.max_length)
コード例 #16
0
ファイル: evaluation.py プロジェクト: hagait62/MWT
                    help="Normalize embeddings before training")
# inference parameters
parser.add_argument("--multilingual_inference_method",
                    nargs='+',
                    help="which inference methods to use",
                    default=['BI', 'NT', 'CNT', 'CAT'])

# parse parameters
params = parser.parse_args()
# check parameters
assert not params.cuda or torch.cuda.is_available()
assert all(os.path.isfile(emb) for emb in params.embs)
assert len(params.langs) == len(params.embs)
assert all([
    inf_met in ['BI', 'NT', 'CNT', 'CAT'] or inf_met.startswith('CAT')
    for inf_met in params.multilingual_inference_method
])

# build logger / model / trainer / evaluator
logger = initialize_exp(params)
embs, mapping = build_model(params)
trainer = Trainer(embs, mapping, params)
evaluator = Evaluator(trainer)
"""
Inference with MWT (Multilingual Word Translation)
"""
logger.info('Starting inference...')

# embeddings evaluation
evaluator.word_translation()
logger.info('End of inference.\n\n')
コード例 #17
0
ファイル: unsupervised.py プロジェクト: tpetmanson/MUSE
params = parser.parse_args()

# check parameters
assert not params.cuda or torch.cuda.is_available()
assert 0 <= params.dis_dropout < 1
assert 0 <= params.dis_input_dropout < 1
assert 0 <= params.dis_smooth < 0.5
assert params.dis_lambda > 0 and params.dis_steps > 0
assert 0 < params.lr_shrink <= 1
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)
assert params.export in ["", "txt", "pth"]

# build model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping, discriminator = build_model(params, True)
trainer = Trainer(src_emb, tgt_emb, mapping, discriminator, params)
evaluator = Evaluator(trainer)


"""
Learning loop for Adversarial Training
"""
if params.adversarial:
    logger.info('----> ADVERSARIAL TRAINING <----\n\n')

    # training loop
    for n_epoch in range(params.n_epochs):

        logger.info('Starting adversarial training epoch %i...' % n_epoch)
        tic = time.time()
コード例 #18
0
def ensemble_translate(FLAGS):
    GlobalNames.USE_GPU = FLAGS.use_gpu

    config_path = os.path.abspath(FLAGS.config_path)

    with open(config_path.strip()) as f:
        configs = yaml.load(f)

    data_configs = configs['data_configs']
    model_configs = configs['model_configs']

    timer = Timer()
    # ================================================================================== #
    # Load Data

    INFO('Loading data...')
    timer.tic()

    # Generate target dictionary
    vocab_src = Vocabulary(**data_configs["vocabularies"][0])
    vocab_tgt = Vocabulary(**data_configs["vocabularies"][1])

    valid_dataset = TextLineDataset(data_path=FLAGS.source_path,
                                    vocabulary=vocab_src)

    valid_iterator = DataIterator(dataset=valid_dataset,
                                  batch_size=FLAGS.batch_size,
                                  use_bucket=True,
                                  buffer_size=100000,
                                  numbering=True)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # ================================================================================== #
    # Build Model & Sampler & Validation
    INFO('Building model...')
    timer.tic()

    nmt_models = []

    model_path = FLAGS.model_path

    for ii in range(len(model_path)):

        nmt_model = build_model(n_src_vocab=vocab_src.max_n_words,
                                n_tgt_vocab=vocab_tgt.max_n_words,
                                **model_configs)
        nmt_model.eval()
        INFO('Done. Elapsed time {0}'.format(timer.toc()))

        INFO('Reloading model parameters...')
        timer.tic()

        params = load_model_parameters(model_path[ii], map_location="cpu")

        nmt_model.load_state_dict(params)

        if GlobalNames.USE_GPU:
            nmt_model.cuda()

        nmt_models.append(nmt_model)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    INFO('Begin...')
    result_numbers = []
    result = []
    n_words = 0

    timer.tic()

    infer_progress_bar = tqdm(total=len(valid_iterator),
                              desc=' - (Infer)  ',
                              unit="sents")

    valid_iter = valid_iterator.build_generator()
    for batch in valid_iter:

        numbers, seqs_x = batch

        batch_size_t = len(seqs_x)

        x = prepare_data(seqs_x=seqs_x, cuda=GlobalNames.USE_GPU)

        with torch.no_grad():
            word_ids = ensemble_beam_search(nmt_models=nmt_models,
                                            beam_size=FLAGS.beam_size,
                                            max_steps=FLAGS.max_steps,
                                            src_seqs=x,
                                            alpha=FLAGS.alpha)

        word_ids = word_ids.cpu().numpy().tolist()

        # Append result
        for sent_t in word_ids:
            sent_t = [[wid for wid in line if wid != PAD] for line in sent_t]
            result.append(sent_t)

            n_words += len(sent_t[0])

        infer_progress_bar.update(batch_size_t)

    infer_progress_bar.close()

    INFO('Done. Speed: {0:.2f} words/sec'.format(
        n_words / (timer.toc(return_seconds=True))))

    translation = []
    for sent in result:
        samples = []
        for trans in sent:
            sample = []
            for w in trans:
                if w == vocab_tgt.EOS:
                    break
                sample.append(vocab_tgt.id2token(w))
            samples.append(vocab_tgt.tokenizer.detokenize(sample))
        translation.append(samples)

    # resume the ordering
    origin_order = np.argsort(result_numbers).tolist()
    translation = [translation[ii] for ii in origin_order]

    keep_n = FLAGS.beam_size if FLAGS.keep_n <= 0 else min(
        FLAGS.beam_size, FLAGS.keep_n)
    outputs = ['%s.%d' % (FLAGS.saveto, i) for i in range(keep_n)]

    with batch_open(outputs, 'w') as handles:
        for trans in translation:
            for i in range(keep_n):
                if i < len(trans):
                    handles[i].write('%s\n' % trans[i])
                else:
                    handles[i].write('%s\n' % 'eos')
コード例 #19
0
def train(flags):
    """
    flags:
        saveto: str
        reload: store_true
        config_path: str
        pretrain_path: str, default=""
        model_name: str
        log_path: str
    """

    # ================================================================================== #
    # Initialization for training on different devices
    # - CPU/GPU
    # - Single/Distributed
    Constants.USE_GPU = flags.use_gpu

    world_size = 1
    rank = 0
    local_rank = 0

    if Constants.USE_GPU:
        torch.cuda.set_device(local_rank)
        Constants.CURRENT_DEVICE = "cuda:{0}".format(local_rank)
    else:
        Constants.CURRENT_DEVICE = "cpu"

    # If not root_rank, close logging
    # else write log of training to file.
    if rank == 0:
        write_log_to_file(
            os.path.join(flags.log_path,
                         "%s.log" % time.strftime("%Y%m%d-%H%M%S")))
    else:
        close_logging()

    # ================================================================================== #
    # Parsing configuration files
    # - Load default settings
    # - Load pre-defined settings
    # - Load user-defined settings

    configs = prepare_configs(flags.config_path, flags.predefined_config)

    data_configs = configs['data_configs']
    model_configs = configs['model_configs']
    optimizer_configs = configs['optimizer_configs']
    training_configs = configs['training_configs']

    INFO(pretty_configs(configs))

    Constants.SEED = training_configs['seed']

    set_seed(Constants.SEED)

    timer = Timer()

    # ================================================================================== #
    # Load Data

    INFO('Loading data...')
    timer.tic()

    # Generate target dictionary
    vocab_src = Vocabulary.build_from_file(**data_configs['vocabularies'][0])

    Constants.EOS = vocab_src.eos
    Constants.PAD = vocab_src.pad
    Constants.BOS = vocab_src.bos

    train_bitext_dataset = TextLineDataset(
        data_path=data_configs['train_data'][0],
        vocabulary=vocab_src,
        max_len=data_configs['max_len'][0],
        is_train_dataset=True)

    valid_bitext_dataset = TextLineDataset(
        data_path=data_configs['valid_data'][0],
        vocabulary=vocab_src,
        is_train_dataset=False)

    training_iterator = DataIterator(
        dataset=train_bitext_dataset,
        batch_size=training_configs["batch_size"],
        use_bucket=training_configs['use_bucket'],
        buffer_size=training_configs['buffer_size'],
        batching_func=training_configs['batching_key'],
        world_size=world_size,
        rank=rank)
    valid_iterator = DataIterator(
        dataset=valid_bitext_dataset,
        batch_size=training_configs['valid_batch_size'],
        use_bucket=True,
        buffer_size=100000,
        numbering=True,
        shuffle=False,
        world_size=world_size,
        rank=rank)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # ================================ Begin ======================================== #
    # Build Model & Optimizer
    # We would do steps below on after another
    #     1. build models & criterion
    #     2. move models & criterion to gpu if needed
    #     3. load pre-trained model if needed
    #     4. build optimizer
    #     5. build learning rate scheduler if needed
    #     6. load checkpoints if needed

    # 0. Initial

    model_collections = Collections()

    checkpoint_saver = Saver(
        save_prefix="{0}.ckpt".format(
            os.path.join(flags.saveto, flags.model_name)),
        num_max_keeping=training_configs['num_kept_checkpoints'])
    best_model_prefix = os.path.join(
        flags.saveto, flags.model_name + Constants.MY_BEST_MODEL_SUFFIX)
    best_model_saver = Saver(
        save_prefix=best_model_prefix,
        num_max_keeping=training_configs['num_kept_best_model'])

    # 1. Build Model & Criterion
    INFO('Building model...')
    timer.tic()
    nmt_model = build_model(vocab_size=vocab_src.max_n_words,
                            padding_idx=vocab_src.pad,
                            vocab_src=vocab_src,
                            **model_configs)
    INFO(nmt_model)
    # 损失函数
    critic = torch.nn.CrossEntropyLoss(ignore_index=Constants.PAD)
    INFO(critic)

    # 2. Move to GPU
    if Constants.USE_GPU:
        nmt_model = nmt_model.cuda()
        critic = critic.cuda()

    # 3. Load pretrained model if needed
    load_pretrained_model(nmt_model,
                          flags.pretrain_path,
                          exclude_prefix=flags.pretrain_exclude_prefix,
                          device=Constants.CURRENT_DEVICE)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # 4. Build optimizer
    INFO('Building Optimizer...')
    optimizer = torch.optim.Adam(nmt_model.parameters(),
                                 lr=optimizer_configs['learning_rate'])

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # ================================================================================== #
    # Prepare training

    eidx = model_collections.get_collection("eidx", [0])[-1]
    uidx = model_collections.get_collection("uidx", [1])[-1]
    bad_count = model_collections.get_collection("bad_count", [0])[-1]
    oom_count = model_collections.get_collection("oom_count", [0])[-1]
    is_early_stop = model_collections.get_collection("is_early_stop", [
        False,
    ])[-1]

    train_loss_meter = AverageMeter()
    sent_per_sec_meter = TimeMeter()
    tok_per_sec_meter = TimeMeter()

    grad_denom = 0
    train_loss = 0.0
    cum_n_words = 0
    valid_loss = best_valid_loss = float('inf')

    if rank == 0:
        summary_writer = SummaryWriter(log_dir=flags.log_path)
    else:
        summary_writer = None

    sent_per_sec_meter.start()
    tok_per_sec_meter.start()

    INFO('Begin training...')

    while True:

        if summary_writer is not None:
            summary_writer.add_scalar("Epoch", (eidx + 1), uidx)

        # Build iterator and progress bar
        training_iter = training_iterator.build_generator()

        if rank == 0:
            training_progress_bar = tqdm(desc=' - (Epc {}, Upd {}) '.format(
                eidx, uidx),
                                         total=len(training_iterator),
                                         unit="sents")
        else:
            training_progress_bar = None

        for batch in training_iter:
            seqs_x = batch

            batch_size = len(seqs_x)
            cum_n_words = 0.0
            train_loss = 0.0

            try:
                # Prepare data
                grad_denom += batch_size
                x = prepare_data(seqs_x, seqs_y=None, cuda=Constants.USE_GPU)
                nmt_model.train()
                critic.train()
                critic.zero_grad()
                with torch.enable_grad():
                    logits = nmt_model(x[:-1])
                    logits = logits.view(-1, vocab_src.max_n_words)
                    trg = x[1:]
                    trg = trg.view(-1)
                    loss = critic(logits, trg)
                    loss.backward()
                    optimizer.step()
                    valid_token = (trg != Constants.PAD).long().sum().item()
                    cum_n_words += valid_token
                    train_loss += loss.item() * valid_token

            except RuntimeError as e:
                if 'out of memory' in str(e):
                    print('| WARNING: ran out of memory, skipping batch')
                    oom_count += 1
                else:
                    raise e

            # When update_cycle becomes 0, it means end of one batch. Several things will be done:
            # - update parameters
            # - reset update_cycle and grad_denom, update uidx
            # - learning rate scheduling
            # - update moving average

            if training_progress_bar is not None:
                training_progress_bar.update(grad_denom)
                training_progress_bar.set_description(
                    ' - (Epc {}, Upd {}) '.format(eidx, uidx))

                postfix_str = 'TrainLoss: {:.2f}, ValidLoss(best): {:.2f} ({:.2f}), '.format(
                    train_loss / cum_n_words, valid_loss, best_valid_loss)
                training_progress_bar.set_postfix_str(postfix_str)

            # 4. update meters
            train_loss_meter.update(train_loss, cum_n_words)
            sent_per_sec_meter.update(grad_denom)
            tok_per_sec_meter.update(cum_n_words)

            # 5. reset accumulated variables, update uidx
            grad_denom = 0
            uidx += 1
            cum_n_words = 0.0
            train_loss = 0.0

            # ================================================================================== #
            # Display some information
            if should_trigger_by_steps(
                    uidx, eidx, every_n_step=training_configs['disp_freq']):

                if summary_writer is not None:
                    summary_writer.add_scalar(
                        "Speed(sents/sec)",
                        scalar_value=sent_per_sec_meter.ave,
                        global_step=uidx)
                    summary_writer.add_scalar(
                        "Speed(words/sec)",
                        scalar_value=tok_per_sec_meter.ave,
                        global_step=uidx)
                    summary_writer.add_scalar(
                        "train_loss",
                        scalar_value=train_loss_meter.ave,
                        global_step=uidx)
                    summary_writer.add_scalar("oom_count",
                                              scalar_value=oom_count,
                                              global_step=uidx)

                # Reset Meters
                sent_per_sec_meter.reset()
                tok_per_sec_meter.reset()
                train_loss_meter.reset()

            # ================================================================================== #
            # Loss Validation & Learning rate annealing
            if should_trigger_by_steps(
                    global_step=uidx,
                    n_epoch=eidx,
                    every_n_step=training_configs['loss_valid_freq'],
                    min_step=training_configs['bleu_valid_warmup'],
                    debug=flags.debug):

                valid_iter = valid_iterator.build_generator()
                valid_loss = 0
                total_tokens = 0
                for batch in valid_iter:
                    seq_number, seqs_x = batch
                    x = prepare_data(seqs_x,
                                     seqs_y=None,
                                     cuda=Constants.USE_GPU)
                    nmt_model.eval()
                    critic.eval()
                    with torch.no_grad():
                        logits = nmt_model(x[:-1])
                        logits = logits.view(-1, vocab_src.max_n_words)
                        trg = x[1:]
                        valid_token = (trg != Constants.PAD).sum(-1)
                        batch_size, seq_len = trg.shape
                        trg = trg.view(-1)
                        # loss = critic(logits, trg)
                        # valid_token = (trg != Constants.PAD).long().sum().item()
                        # total_tokens += valid_token
                        # valid_loss += loss.item() * valid_token
                        import torch.nn.functional as F
                        loss = F.cross_entropy(logits,
                                               trg,
                                               reduce=False,
                                               ignore_index=vocab_src.pad)
                        loss = loss.view(batch_size, seq_len)
                        loss = loss.sum(-1)
                        print(seq_number)
                        print(loss.double().div(valid_token.double()))
                exit(0)
                valid_loss = valid_loss / total_tokens
                model_collections.add_to_collection("history_losses",
                                                    valid_loss)

                min_history_loss = np.array(
                    model_collections.get_collection("history_losses")).min()
                best_valid_loss = min_history_loss
                if summary_writer is not None:
                    summary_writer.add_scalar("loss",
                                              valid_loss,
                                              global_step=uidx)
                    summary_writer.add_scalar("best_loss",
                                              min_history_loss,
                                              global_step=uidx)
                # If model get new best valid bleu score
                if valid_loss <= best_valid_loss:
                    bad_count = 0

                    if is_early_stop is False:
                        if rank == 0:
                            # 1. save the best model
                            torch.save(nmt_model.state_dict(),
                                       best_model_prefix + ".final")

                            # 2. record all several best models
                            best_model_saver.save(
                                global_step=uidx,
                                model=nmt_model,
                                optimizer=optimizer,
                                collections=model_collections)
                    else:
                        bad_count += 1

                        # At least one epoch should be traversed
                        if bad_count >= training_configs[
                                'early_stop_patience'] and eidx > 0:
                            is_early_stop = True
                            WARN("Early Stop!")
                            exit(0)

                if summary_writer is not None:
                    summary_writer.add_scalar("bad_count", bad_count, uidx)

                INFO("{0} Loss: {1:.2f}  patience: {2}".format(
                    uidx, valid_loss, bad_count))

            # ================================================================================== #
            # # Saving checkpoints
            # if should_trigger_by_steps(uidx, eidx, every_n_step=training_configs['save_freq'], debug=flags.debug):
            #     model_collections.add_to_collection("uidx", uidx)
            #     model_collections.add_to_collection("eidx", eidx)
            #     model_collections.add_to_collection("bad_count", bad_count)
            #
            #     if not is_early_stop:
            #         if rank == 0:
            #             checkpoint_saver.save(global_step=uidx,
            #                                   model=nmt_model,
            #                                   optim=optimizer,
            #                                   collections=model_collections)

        if training_progress_bar is not None:
            training_progress_bar.close()

        eidx += 1
        if eidx > training_configs["max_epochs"]:
            break
コード例 #20
0
def main():
    params = load_args()

    logger = create_logger(
        os.path.join(params.exp_path, "lnmap-experiment.log"))
    logger.info("{}".format(
        jsbeautifier.beautify(json.dumps(params.__dict__), opts)))
    set_seed(params)

    src_emb, tgt_emb, mapping_G, mapping_F, encoder_A, decoder_A, encoder_B, decoder_B = build_model(
        params)
    trainer = Trainer(src_emb, tgt_emb, mapping_G, mapping_F, encoder_A,
                      decoder_A, encoder_B, decoder_B, params)
    evaluator = Evaluator(trainer)

    trainer.load_training_dico(logger)
    trainer.load_training_dico(logger, src2tgt=False)
    logger.info("Seed dictionary size: {}".format(trainer.dico_AB.shape[0]))
    trainer.dico_AB_original = trainer.dico_AB.clone()
    trainer.dico_BA_original = trainer.dico_BA.clone()

    if params.load_autoenc_weights:
        load_autoenc_weights(params, trainer, logger)
    else:
        trainer.train_autoencoder_A(logger)
        trainer.train_autoencoder_B(logger)
        if params.save_autoenc_weights:
            save_autoenc_weights(params, trainer, logger)

    # Source to Target Training
    logger.info("\n \n Training for {} to {}".format(params.src_lang,
                                                     params.tgt_lang))
    for i in range(params.iteration):
        trainer.train_A2B()

        emb1 = (trainer.mapping_G(
            trainer.encoder_A(
                trainer.src_emb.weight.data)).data)[0:params.dico_max_rank]
        emb2 = (trainer.encoder_B(
            trainer.tgt_emb.weight.data).data)[0:params.dico_max_rank]
        emb1 = emb1 / emb1.norm(2, 1, keepdim=True).expand_as(emb1)
        emb2 = emb2 / emb2.norm(2, 1, keepdim=True).expand_as(emb2)

        all_pairs, all_scores = generate_new_dictionary_bidirectional(
            emb1, emb2)

        add_size = params.induced_dico_c * (i + 1)
        trainer.dico_AB = torch.cat(
            (trainer.dico_AB_original, all_pairs[:add_size].cuda()), 0)
        if i == 0:
            logger.info(
                "After first iteration train dictionary size: {}".format(
                    trainer.dico_AB.shape[0]))

    logger.info("Final iteration train dictionary size: {}".format(
        trainer.dico_AB.shape[0]))
    trainer.set_eval()
    precision_at_1 = get_word_translation_accuracy(
        params,
        trainer.mapping_G(trainer.encoder_A(
            trainer.src_emb.weight.data).data).data,
        trainer.encoder_B(trainer.tgt_emb.weight.data).data,
        src2tgt=True)

    if params.save_model_weights:
        save_model_weights(params, trainer, src2tgt=True)

    # Target to Source Training
    logger.info("\n \n Training for {} to {}".format(params.tgt_lang,
                                                     params.src_lang))
    n_iter = 0
    for i in range(params.iteration):
        trainer.train_B2A()

        emb1 = ((trainer.encoder_A(
            trainer.src_emb.weight.data)).data)[0:params.dico_max_rank]
        emb2 = (trainer.mapping_F(
            trainer.encoder_B(
                trainer.tgt_emb.weight.data)).data)[0:params.dico_max_rank]
        emb1 = emb1 / emb1.norm(2, 1, keepdim=True).expand_as(emb1)
        emb2 = emb2 / emb2.norm(2, 1, keepdim=True).expand_as(emb2)

        all_pairs, all_scores = generate_new_dictionary_bidirectional(
            emb2, emb1)

        add_size = params.induced_dico_c * (i + 1)
        trainer.dico_BA = torch.cat(
            (trainer.dico_BA_original, all_pairs[:add_size].cuda()), 0)
        if i == 0:
            logger.info(
                "After first iteration train dictionary size: {}".format(
                    trainer.dico_BA.shape[0]))

    logger.info("Final iteration train dictionary size: {}".format(
        trainer.dico_BA.shape[0]))

    trainer.set_eval()
    precision_at_1 = get_word_translation_accuracy(
        params,
        trainer.mapping_F(trainer.encoder_B(
            trainer.tgt_emb.weight.data).data).data,
        trainer.encoder_A(trainer.src_emb.weight.data).data,
        src2tgt=False)

    if params.save_model_weights:
        save_model_weights(params, trainer, src2tgt=False)
コード例 #21
0
# check parameters
assert not params.cuda or torch.cuda.is_available()
assert 0 <= params.dis_dropout < 1
assert 0 <= params.dis_input_dropout < 1
assert 0 <= params.dis_smooth < 0.5
assert params.dis_lambda > 0 and params.dis_steps > 0
assert 0 < params.lr_shrink <= 1
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)
assert params.dico_eval == 'default' or params.dico_eval == 'vecmap' or os.path.isfile(
    params.dico_eval)
assert params.export in ["", "txt", "pth"]

# build model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping_G, mapping_F, discriminator_A, discriminator_B, encoder_A, decoder_A, encoder_B, decoder_B = build_model(
    params, True)
trainer = Trainer(src_emb, tgt_emb, mapping_G, mapping_F, discriminator_A,
                  discriminator_B, encoder_A, decoder_A, encoder_B, decoder_B,
                  params)
evaluator = Evaluator(trainer)
"""
Learning loop for Adversarial Training
"""
if params.adversarial:

    # first train the autoencoder to become mature
    trainer.train_autoencoder_A()
    trainer.train_autoencoder_B()

    logger.info('----> ADVERSARIAL TRAINING <----\n\n')
コード例 #22
0
# parse parameters
params = parser.parse_args()

# check parameters
assert not params.cuda or torch.cuda.is_available()
assert 0 <= params.dis_dropout < 1
assert 0 <= params.dis_input_dropout < 1
assert 0 <= params.dis_smooth < 0.5
assert params.dis_lambda > 0 and params.dis_steps > 0
assert 0 < params.lr_shrink <= 1
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)

# build model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping, discriminator = build_model(params, True)
trainer = Trainer(src_emb, tgt_emb, mapping, discriminator, params)
evaluator = Evaluator(trainer)
"""
Learning loop for Adversarial Training
"""
if params.adversarial:
    logger.info('----> ADVERSARIAL TRAINING <----\n\n')

    # training loop
    for n_epoch in range(params.n_epochs):

        logger.info('Starting adversarial training epoch %i...' % n_epoch)
        tic = time.time()
        n_words_proc = 0
        stats = {'DIS_COSTS': []}
コード例 #23
0
def train(FLAGS):
    """
    FLAGS:
        saveto: str
        reload: store_true
        config_path: str
        pretrain_path: str, default=""
        model_name: str
        log_path: str
    """

    # write log of training to file.
    write_log_to_file(
        os.path.join(FLAGS.log_path,
                     "%s.log" % time.strftime("%Y%m%d-%H%M%S")))

    GlobalNames.USE_GPU = FLAGS.use_gpu

    if GlobalNames.USE_GPU:
        CURRENT_DEVICE = "cpu"
    else:
        CURRENT_DEVICE = "cuda:0"

    config_path = os.path.abspath(FLAGS.config_path)
    with open(config_path.strip()) as f:
        configs = yaml.load(f)

    INFO(pretty_configs(configs))

    # Add default configs
    configs = default_configs(configs)
    data_configs = configs['data_configs']
    model_configs = configs['model_configs']
    optimizer_configs = configs['optimizer_configs']
    training_configs = configs['training_configs']

    GlobalNames.SEED = training_configs['seed']

    set_seed(GlobalNames.SEED)

    best_model_prefix = os.path.join(
        FLAGS.saveto, FLAGS.model_name + GlobalNames.MY_BEST_MODEL_SUFFIX)

    timer = Timer()

    # ================================================================================== #
    # Load Data

    INFO('Loading data...')
    timer.tic()

    # Generate target dictionary
    vocab_tgt = Vocabulary(**data_configs["vocabularies"][0])

    train_batch_size = training_configs["batch_size"] * max(
        1, training_configs["update_cycle"])
    train_buffer_size = training_configs["buffer_size"] * max(
        1, training_configs["update_cycle"])

    train_bitext_dataset = ZipDataset(TextLineDataset(
        data_path=data_configs['train_data'][0],
        vocabulary=vocab_tgt,
        max_len=data_configs['max_len'][0],
    ),
                                      shuffle=training_configs['shuffle'])

    valid_bitext_dataset = ZipDataset(
        TextLineDataset(
            data_path=data_configs['valid_data'][0],
            vocabulary=vocab_tgt,
        ))

    training_iterator = DataIterator(
        dataset=train_bitext_dataset,
        batch_size=train_batch_size,
        use_bucket=training_configs['use_bucket'],
        buffer_size=train_buffer_size,
        batching_func=training_configs['batching_key'])

    valid_iterator = DataIterator(
        dataset=valid_bitext_dataset,
        batch_size=training_configs['valid_batch_size'],
        use_bucket=True,
        buffer_size=100000,
        numbering=True)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    lrate = optimizer_configs['learning_rate']
    is_early_stop = False

    # ================================ Begin ======================================== #
    # Build Model & Optimizer
    # We would do steps below on after another
    #     1. build models & criterion
    #     2. move models & criterion to gpu if needed
    #     3. load pre-trained model if needed
    #     4. build optimizer
    #     5. build learning rate scheduler if needed
    #     6. load checkpoints if needed

    # 0. Initial
    model_collections = Collections()
    checkpoint_saver = Saver(
        save_prefix="{0}.ckpt".format(
            os.path.join(FLAGS.saveto, FLAGS.model_name)),
        num_max_keeping=training_configs['num_kept_checkpoints'])
    best_model_saver = Saver(
        save_prefix=best_model_prefix,
        num_max_keeping=training_configs['num_kept_best_model'])

    # 1. Build Model & Criterion
    INFO('Building model...')
    timer.tic()
    lm_model = build_model(n_tgt_vocab=vocab_tgt.max_n_words, **model_configs)
    INFO(lm_model)

    params_total = sum([p.numel() for n, p in lm_model.named_parameters()])
    params_with_embedding = sum([
        p.numel() for n, p in lm_model.named_parameters()
        if n.find('embedding') == -1
    ])
    INFO('Total parameters: {}'.format(params_total))
    INFO('Total parameters (excluding word embeddings): {}'.format(
        params_with_embedding))

    critic = NMTCriterion(label_smoothing=model_configs['label_smoothing'])

    INFO(critic)
    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # 2. Move to GPU
    if GlobalNames.USE_GPU:
        lm_model = lm_model.cuda()
        critic = critic.cuda()

    # 3. Load pretrained model if needed
    lm_model.init_parameters(FLAGS.pretrain_path, device=CURRENT_DEVICE)

    # 4. Build optimizer
    INFO('Building Optimizer...')
    optim = Optimizer(name=optimizer_configs['optimizer'],
                      model=lm_model,
                      lr=lrate,
                      grad_clip=optimizer_configs['grad_clip'],
                      optim_args=optimizer_configs['optimizer_params'])

    # 5. Build scheduler for optimizer if needed
    if optimizer_configs['schedule_method'] is not None:

        if optimizer_configs['schedule_method'] == "loss":

            scheduler = ReduceOnPlateauScheduler(
                optimizer=optim, **optimizer_configs["scheduler_configs"])

        elif optimizer_configs['schedule_method'] == "noam":
            scheduler = NoamScheduler(optimizer=optim,
                                      **optimizer_configs['scheduler_configs'])
        else:
            WARN(
                "Unknown scheduler name {0}. Do not use lr_scheduling.".format(
                    optimizer_configs['schedule_method']))
            scheduler = None
    else:
        scheduler = None

    # 6. build moving average

    if training_configs['moving_average_method'] is not None:
        ma = MovingAverage(
            moving_average_method=training_configs['moving_average_method'],
            named_params=lm_model.named_parameters(),
            alpha=training_configs['moving_average_alpha'])
    else:
        ma = None

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # Reload from latest checkpoint
    if FLAGS.reload:
        checkpoint_saver.load_latest(model=lm_model,
                                     optim=optim,
                                     lr_scheduler=scheduler,
                                     collections=model_collections,
                                     ma=ma)

    # ================================================================================== #
    # Prepare training

    eidx = model_collections.get_collection("eidx", [0])[-1]
    uidx = model_collections.get_collection("uidx", [0])[-1]
    bad_count = model_collections.get_collection("bad_count", [0])[-1]
    oom_count = model_collections.get_collection("oom_count", [0])[-1]

    summary_writer = SummaryWriter(log_dir=FLAGS.log_path)

    cum_samples = 0
    cum_words = 0
    valid_loss = best_valid_loss = float('inf')  # Max Float
    saving_files = []

    # Timer for computing speed
    timer_for_speed = Timer()
    timer_for_speed.tic()

    INFO('Begin training...')

    while True:
        summary_writer.add_scalar("Epoch", (eidx + 1), uidx)

        # Build iterator and progress bar
        training_iter = training_iterator.build_generator()
        training_progress_bar = tqdm(desc=' - (Epc {}, Upd {}) '.format(
            eidx, uidx),
                                     total=len(training_iterator),
                                     unit="sents")
        for batch in training_iter:

            uidx += 1

            if optimizer_configs[
                    "schedule_method"] is not None and optimizer_configs[
                        "schedule_method"] != "loss":
                scheduler.step(global_step=uidx)

            seqs_y = batch

            n_samples_t = len(seqs_y)
            n_words_t = sum(len(s) for s in seqs_y)

            cum_samples += n_samples_t
            cum_words += n_words_t

            train_loss = 0.
            optim.zero_grad()
            try:
                # Prepare data
                for (seqs_y_t, ) in split_shard(
                        seqs_y, split_size=training_configs['update_cycle']):
                    y = prepare_data(seqs_y_t, cuda=GlobalNames.USE_GPU)

                    loss = compute_forward(
                        model=lm_model,
                        critic=critic,
                        # seqs_x=x,
                        seqs_y=y,
                        eval=False,
                        normalization=n_samples_t,
                        norm_by_words=training_configs["norm_by_words"])
                    train_loss += loss / y.size(
                        1) if not training_configs["norm_by_words"] else loss
                optim.step()

            except RuntimeError as e:
                if 'out of memory' in str(e):
                    print('| WARNING: ran out of memory, skipping batch')
                    oom_count += 1
                    optim.zero_grad()
                else:
                    raise e

            if ma is not None and eidx >= training_configs[
                    'moving_average_start_epoch']:
                ma.step()

            training_progress_bar.update(n_samples_t)
            training_progress_bar.set_description(
                ' - (Epc {}, Upd {}) '.format(eidx, uidx))
            training_progress_bar.set_postfix_str(
                'TrainLoss: {:.2f}, ValidLoss(best): {:.2f} ({:.2f})'.format(
                    train_loss, valid_loss, best_valid_loss))
            summary_writer.add_scalar("train_loss",
                                      scalar_value=train_loss,
                                      global_step=uidx)

            # ================================================================================== #
            # Display some information
            if should_trigger_by_steps(
                    uidx, eidx, every_n_step=training_configs['disp_freq']):
                # words per second and sents per second
                words_per_sec = cum_words / (timer.toc(return_seconds=True))
                sents_per_sec = cum_samples / (timer.toc(return_seconds=True))
                lrate = list(optim.get_lrate())[0]

                summary_writer.add_scalar("Speed(words/sec)",
                                          scalar_value=words_per_sec,
                                          global_step=uidx)
                summary_writer.add_scalar("Speed(sents/sen)",
                                          scalar_value=sents_per_sec,
                                          global_step=uidx)
                summary_writer.add_scalar("lrate",
                                          scalar_value=lrate,
                                          global_step=uidx)
                summary_writer.add_scalar("oom_count",
                                          scalar_value=oom_count,
                                          global_step=uidx)

                # Reset timer
                timer.tic()
                cum_words = 0
                cum_samples = 0

            # ================================================================================== #
            # Saving checkpoints
            if should_trigger_by_steps(
                    uidx,
                    eidx,
                    every_n_step=training_configs['save_freq'],
                    debug=FLAGS.debug):
                model_collections.add_to_collection("uidx", uidx)
                model_collections.add_to_collection("eidx", eidx)
                model_collections.add_to_collection("bad_count", bad_count)

                if not is_early_stop:
                    checkpoint_saver.save(global_step=uidx,
                                          model=lm_model,
                                          optim=optim,
                                          lr_scheduler=scheduler,
                                          collections=model_collections,
                                          ma=ma)

            # ================================================================================== #
            # Loss Validation & Learning rate annealing
            if should_trigger_by_steps(
                    global_step=uidx,
                    n_epoch=eidx,
                    every_n_step=training_configs['loss_valid_freq'],
                    debug=FLAGS.debug):

                if ma is not None:
                    origin_state_dict = deepcopy(lm_model.state_dict())
                    lm_model.load_state_dict(ma.export_ma_params(),
                                             strict=False)

                valid_loss = loss_validation(
                    model=lm_model,
                    critic=critic,
                    valid_iterator=valid_iterator,
                    norm_by_words=training_configs["norm_by_words"])

                model_collections.add_to_collection("history_losses",
                                                    valid_loss)

                min_history_loss = np.array(
                    model_collections.get_collection("history_losses")).min()

                summary_writer.add_scalar("loss", valid_loss, global_step=uidx)
                summary_writer.add_scalar("best_loss",
                                          min_history_loss,
                                          global_step=uidx)

                if ma is not None:
                    lm_model.load_state_dict(origin_state_dict)
                    del origin_state_dict

                if optimizer_configs["schedule_method"] == "loss":
                    scheduler.step(metric=best_valid_loss)

                # If model get new best valid loss
                if valid_loss < best_valid_loss:
                    bad_count = 0

                    if is_early_stop is False:
                        # 1. save the best model
                        torch.save(lm_model.state_dict(),
                                   best_model_prefix + ".final")

                        # 2. record all several best models
                        best_model_saver.save(global_step=uidx, model=lm_model)
                else:
                    bad_count += 1

                    # At least one epoch should be traversed
                    if bad_count >= training_configs[
                            'early_stop_patience'] and eidx > 0:
                        is_early_stop = True
                        WARN("Early Stop!")

                best_valid_loss = min_history_loss

                summary_writer.add_scalar("bad_count", bad_count, uidx)

                INFO("{0} Loss: {1:.2f} lrate: {2:6f} patience: {3}".format(
                    uidx, valid_loss, lrate, bad_count))

        training_progress_bar.close()

        eidx += 1
        if eidx > training_configs["max_epochs"]:
            break
コード例 #24
0
def main():
    VALIDATION_METRIC_SUP = 'precision_at_1-csls_knn_10'
    VALIDATION_METRIC_UNSUP = 'mean_cosine-csls_knn_10-S2T-10000'


    # main
    parser = argparse.ArgumentParser(description='Supervised training')
    parser.add_argument("--seed", type=int, default=-1, help="Initialization seed")
    parser.add_argument("--verbose", type=int, default=2, help="Verbose level (2:debug, 1:info, 0:warning)")
    parser.add_argument("--exp_path", type=str, default="", help="Where to store experiment logs and models")
    parser.add_argument("--exp_name", type=str, default="debug", help="Experiment name")
    parser.add_argument("--exp_id", type=str, default="", help="Experiment ID")
    parser.add_argument("--cuda", type=bool_flag, default=True, help="Run on GPU")
    parser.add_argument("--export", type=str, default="txt", help="Export embeddings after training (txt / pth)")

    # data
    parser.add_argument("--src_lang", type=str, default='en', help="Source language")
    parser.add_argument("--tgt_lang", type=str, default='es', help="Target language")
    parser.add_argument("--aux_lang", type=str, default='', help="Auxiliary language")
    parser.add_argument("--emb_dim", type=int, default=300, help="Embedding dimension")
    parser.add_argument("--max_vocab", type=int, default=200000, help="Maximum vocabulary size (-1 to disable)")
    # training refinement
    parser.add_argument("--n_refinement", type=int, default=5, help="Number of refinement iterations (0 to disable the refinement procedure)")
    # dictionary creation parameters (for refinement)
    parser.add_argument("--dico_train", type=str, default="default", help="Path to training dictionary (default: use identical character strings)")
    parser.add_argument("--dico_eval", type=str, default="default", help="Path to evaluation dictionary")
    parser.add_argument("--dico_method", type=str, default='csls_knn_10', help="Method used for dictionary generation (nn/invsm_beta_30/csls_knn_10)")
    parser.add_argument("--dico_build", type=str, default='S2T&T2S', help="S2T,T2S,S2T|T2S,S2T&T2S")
    parser.add_argument("--dico_threshold", type=float, default=0, help="Threshold confidence for dictionary generation")
    parser.add_argument("--dico_max_rank", type=int, default=10000, help="Maximum dictionary words rank (0 to disable)")
    parser.add_argument("--dico_min_size", type=int, default=0, help="Minimum generated dictionary size (0 to disable)")
    parser.add_argument("--dico_max_size", type=int, default=0, help="Maximum generated dictionary size (0 to disable)")
    # reload pre-trained embeddings
    parser.add_argument("--src_emb", type=str, default='', help="Reload source embeddings")
    parser.add_argument("--tgt_emb", type=str, default='', help="Reload target embeddings")
    parser.add_argument("--aux_emb", type=str, default='', help="Reload auxiliary embeddings")
    parser.add_argument("--normalize_embeddings", type=str, default="", help="Normalize embeddings before training")
    parser.add_argument("--fitting_method", type=str, default="non_iterative", help="Method of fitting, one of [non_iterative, em, gauss_seidel, gradient_based]")

    # parse parameters
    params = parser.parse_args()

    # check parameters
    assert not params.cuda or torch.cuda.is_available()
    assert params.dico_train in ["identical_char", "default"] or os.path.isfile(params.dico_train)
    assert params.dico_build in ["S2T", "T2S", "S2T|T2S", "S2T&T2S"]
    assert params.dico_max_size == 0 or params.dico_max_size < params.dico_max_rank
    assert params.dico_max_size == 0 or params.dico_max_size > params.dico_min_size
    print(params.src_emb, params.tgt_emb, params.aux_emb)
    assert os.path.isfile(params.src_emb)
    assert os.path.isfile(params.tgt_emb)
    assert params.dico_eval == 'default' or os.path.isfile(params.dico_eval)
    assert params.export in ["", "txt", "pth"]

    # build logger / model / trainer / evaluator
    logger = initialize_exp(params)
    src_emb, tgt_emb, aux_emb, mapping, _ = build_model(params, False)

    trainer = Trainer(src_emb, tgt_emb, aux_emb, mapping, None, params)

    # load a training dictionary. if a dictionary path is not provided, use a default
    # one ("default") or create one based on identical character strings ("identical_char")
    trainer.load_training_dico(params.dico_train)

    # define the validation metric
    VALIDATION_METRIC = VALIDATION_METRIC_UNSUP if params.dico_train == 'identical_char' else VALIDATION_METRIC_SUP
    logger.info("Validation metric: %s" % VALIDATION_METRIC)

    # apply the PCCA solution
    trainer.fit(fitting_method=params.fitting_method)

    # IMPORTANT: EVALUATOR SHOULD BE CREATED AFTER TRAINER HAS BEEN FITTED
    evaluator = Evaluator(trainer)

    # embeddings evaluation
    to_log = OrderedDict({})
    evaluator.all_eval(to_log)

    logger.info("__log__:%s" % json.dumps(to_log))
コード例 #25
0
ファイル: main.py プロジェクト: pep8speaks/jiant
def main(cl_arguments):
    ''' Train a model for multitask-training.'''
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)
    # Check for deprecated arg names
    check_arg_name(args)
    args, seed = initial_setup(args, cl_args)
    # Load tasks
    log.info("Loading tasks...")
    start_time = time.time()
    pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args)
    tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name)
    log.info('\tFinished loading tasks in %.3fs', time.time() - start_time)
    log.info('\t Tasks: {}'.format([task.name for task in tasks]))

    # Build model
    log.info('Building model...')
    start_time = time.time()
    model = build_model(args, vocab, word_embs, tasks)
    log.info('\tFinished building model in %.3fs', time.time() - start_time)

    # Start Tensorboard if requested
    if cl_args.tensorboard:
        tb_logdir = os.path.join(args.run_dir, "tensorboard")
        _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port)

    check_configurations(args, pretrain_tasks, target_tasks)

    if args.do_pretrain:
        # Train on pretrain tasks
        log.info("Training...")
        stop_metric = pretrain_tasks[0].val_metric if len(
            pretrain_tasks) == 1 else 'macro_avg'
        should_decrease = pretrain_tasks[0].val_metric_decreases if len(
            pretrain_tasks) == 1 else False
        trainer, _, opt_params, schd_params = build_trainer(args, [],
                                                            model,
                                                            args.run_dir,
                                                            should_decrease,
                                                            phase="pretrain")
        to_train = [(n, p) for n, p in model.named_parameters()
                    if p.requires_grad]
        _ = trainer.train(pretrain_tasks,
                          stop_metric,
                          args.batch_size,
                          args.weighting_method,
                          args.scaling_method,
                          to_train,
                          opt_params,
                          schd_params,
                          args.shared_optimizer,
                          args.load_model,
                          phase="pretrain")

    # For checkpointing logic
    if not args.do_target_task_training:
        log.info("In strict mode because do_target_task_training is off. "
                 "Will crash if any tasks are missing from the checkpoint.")
        strict = True
    else:
        strict = False

    if args.do_target_task_training:
        # Train on target tasks
        task_names_to_avoid_loading = setup_target_task_training(
            args, target_tasks, model, strict)
        if args.transfer_paradigm == "frozen":
            # might be empty if elmo = 0. scalar_mix_0 should always be
            # pretrain scalars
            elmo_scalars = [(n, p) for n, p in model.named_parameters()
                            if "scalar_mix" in n and "scalar_mix_0" not in n]
            # Fails when sep_embs_for_skip is 0 and elmo_scalars has nonzero
            # length.
            assert_for_log(
                not elmo_scalars or args.sep_embs_for_skip,
                "Error: ELMo scalars loaded and will be updated in do_target_task_training but "
                "they should not be updated! Check sep_embs_for_skip flag or make an issue."
            )
        for task in target_tasks:
            # Skip mnli-diagnostic
            # This has to be handled differently than probing tasks because probing tasks require the "is_probing_task"
            # to be set to True. For mnli-diagnostic this flag will be False because it is part of GLUE and
            # "is_probing_task is global flag specific to a run, not to a task.
            if task.name == 'mnli-diagnostic':
                continue

            if args.transfer_paradigm == "finetune":
                # Train both the task specific models as well as sentence
                # encoder.
                to_train = [(n, p) for n, p in model.named_parameters()
                            if p.requires_grad]
            else:  # args.transfer_paradigm == "frozen":
                # Only train task-specific module
                pred_module = getattr(model, "%s_mdl" % task.name)
                to_train = [(n, p) for n, p in pred_module.named_parameters()
                            if p.requires_grad]
                to_train += elmo_scalars

            trainer, _, opt_params, schd_params = build_trainer(
                args, [task.name, 'target_train'],
                model,
                args.run_dir,
                task.val_metric_decreases,
                phase="target_train")
            _ = trainer.train(tasks=[task],
                              stop_metric=task.val_metric,
                              batch_size=args.batch_size,
                              weighting_method=args.weighting_method,
                              scaling_method=args.scaling_method,
                              train_params=to_train,
                              optimizer_params=opt_params,
                              scheduler_params=schd_params,
                              shared_optimizer=args.shared_optimizer,
                              load_model=False,
                              phase="target_train")

            # Now that we've trained a model, revert to the normal checkpoint
            # logic for this task.
            if task.name in task_names_to_avoid_loading:
                task_names_to_avoid_loading.remove(task.name)

            # The best checkpoint will accumulate the best parameters for each
            # task.
            layer_path = os.path.join(args.run_dir,
                                      "model_state_target_train_best.th")

            if args.transfer_paradigm == "finetune":
                # Save this fine-tune model with a task specific name.
                finetune_path = os.path.join(
                    args.run_dir, "model_state_%s_best.th" % task.name)
                os.rename(layer_path, finetune_path)

                # Reload the original best model from before target-task
                # training.
                pre_finetune_path = get_best_checkpoint_path(args.run_dir)
                load_model_state(model,
                                 pre_finetune_path,
                                 args.cuda,
                                 skip_task_models=[],
                                 strict=strict)
            else:  # args.transfer_paradigm == "frozen":
                # Load the current overall best model.
                # Save the best checkpoint from that target task training to be
                # specific to that target task.
                load_model_state(model,
                                 layer_path,
                                 args.cuda,
                                 strict=strict,
                                 skip_task_models=task_names_to_avoid_loading)

    if args.do_full_eval:
        # Evaluate
        log.info("Evaluating...")
        splits_to_write = evaluate.parse_write_preds_arg(args.write_preds)
        if args.transfer_paradigm == "finetune":
            for task in target_tasks:
                if task.name == 'mnli-diagnostic':
                    # we'll load mnli-diagnostic during mnli
                    continue
                # Special checkpointing logic here since we train the sentence encoder
                # and have a best set of sent encoder model weights per task.
                finetune_path = os.path.join(
                    args.run_dir, "model_state_%s_best.th" % task.name)
                if os.path.exists(finetune_path):
                    ckpt_path = finetune_path
                else:
                    ckpt_path = get_best_checkpoint_path(args.run_dir)
                load_model_state(model,
                                 ckpt_path,
                                 args.cuda,
                                 skip_task_models=[],
                                 strict=strict)

                tasks = [task]
                if task.name == 'mnli':
                    tasks += [
                        t for t in target_tasks if t.name == 'mnli-diagnostic'
                    ]
                evaluate_and_write(args, model, tasks, splits_to_write)

        elif args.transfer_paradigm == "frozen":
            # Don't do any special checkpointing logic here
            # since model already has all the trained task specific modules.
            evaluate_and_write(args, model, target_tasks, splits_to_write)

    log.info("Done!")
コード例 #26
0
ファイル: evaluate.py プロジェクト: codealphago/MUSE
# reload pre-trained embeddings
parser.add_argument("--src_emb", type=str, default="", help="Reload source embeddings")
parser.add_argument("--tgt_emb", type=str, default="", help="Reload target embeddings")
parser.add_argument("--max_vocab", type=int, default=200000, help="Maximum vocabulary size")
parser.add_argument("--emb_dim", type=int, default=300, help="Embedding dimension")
parser.add_argument("--normalize_embeddings", type=str, default="", help="Normalize embeddings before training")


# parse parameters
params = parser.parse_args()

# check parameters
assert params.src_lang, "source language undefined"
assert os.path.isfile(params.src_emb)
assert not params.tgt_lang or os.path.isfile(params.tgt_emb)

# build logger / model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping, _ = build_model(params, False)
trainer = Trainer(src_emb, tgt_emb, mapping, None, params)
evaluator = Evaluator(trainer)

# run evaluations
to_log = OrderedDict({'n_iter': 0})
evaluator.monolingual_wordsim(to_log)
if params.tgt_lang:
    evaluator.crosslingual_wordsim(to_log)
    evaluator.word_translation(to_log)
    evaluator.sent_translation(to_log)
    # evaluator.dist_mean_cosine(to_log)
コード例 #27
0
ファイル: main.py プロジェクト: yyht/jiant
def main(cl_arguments):
    ''' Train or load a model. Evaluate on some tasks. '''
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)

    # Logistics #
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)      # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)      # e.g. <project_dir>/jiant-demo/sst
    log.getLogger().addHandler(log.FileHandler(args.local_log_path))

    if cl_args.remote_log:
        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from src import emails
        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _try_logging_git_info()

    log.info("Parsed args: \n%s", args)

    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)
    log.info("Saved config to %s", config_file)

    seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. Falling back to CPU.")
            args.cuda = -1

    # Prepare data #
    log.info("Loading tasks...")
    start_time = time.time()
    train_tasks, eval_tasks, vocab, word_embs = build_tasks(args)
    if any([t.val_metric_decreases for t in train_tasks]) and any(
            [not t.val_metric_decreases for t in train_tasks]):
        log.warn("\tMixing training tasks with increasing and decreasing val metrics!")
    tasks = sorted(set(train_tasks + eval_tasks), key=lambda x: x.name)
    log.info('\tFinished loading tasks in %.3fs', time.time() - start_time)
    log.info('\t Tasks: {}'.format([task.name for task in tasks]))

    # Build or load model #
    log.info('Building model...')
    start_time = time.time()
    model = build_model(args, vocab, word_embs, tasks)
    log.info('\tFinished building model in %.3fs', time.time() - start_time)

    # Check that necessary parameters are set for each step. Exit with error if not.
    steps_log = []

    if not args.load_eval_checkpoint == 'none':
        assert_for_log(os.path.exists(args.load_eval_checkpoint),
                       "Error: Attempting to load model from non-existent path: [%s]" %
                       args.load_eval_checkpoint)
        assert_for_log(
            not args.do_train,
            "Error: Attempting to train a model and then replace that model with one from a checkpoint.")
        steps_log.append("Loading model from path: %s" % args.load_eval_checkpoint)

    if args.do_train:
        assert_for_log(args.train_tasks != "none",
                       "Error: Must specify at least on training task: [%s]" % args.train_tasks)
        assert_for_log(
            args.val_interval %
            args.bpp_base == 0, "Error: val_interval [%d] must be divisible by bpp_base [%d]" %
            (args.val_interval, args.bpp_base))
        steps_log.append("Training model on tasks: %s" % args.train_tasks)

    if args.train_for_eval:
        steps_log.append("Re-training model for individual eval tasks")
        assert_for_log(
            args.eval_val_interval %
            args.bpp_base == 0, "Error: eval_val_interval [%d] must be divisible by bpp_base [%d]" %
            (args.eval_val_interval, args.bpp_base))
        assert_for_log(len(set(train_tasks).intersection(eval_tasks)) == 0
                       or args.allow_reuse_of_pretraining_parameters
                       or args.do_train == 0,
                       "If you're pretraining on a task you plan to reuse as a target task, set\n"
                       "allow_reuse_of_pretraining_parameters = 1(risky), or train in two steps:\n"
                       "  train with do_train = 1, train_for_eval = 0, stop, and restart with\n"
                       "  do_train = 0 and train_for_eval = 1.")

    if args.do_eval:
        assert_for_log(args.eval_tasks != "none",
                       "Error: Must specify at least one eval task: [%s]" % args.eval_tasks)
        steps_log.append("Evaluating model on tasks: %s" % args.eval_tasks)

    # Start Tensorboard if requested
    if cl_args.tensorboard:
        tb_logdir = os.path.join(args.run_dir, "tensorboard")
        _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port)

    log.info("Will run the following steps:\n%s", '\n'.join(steps_log))
    if args.do_train:
        # Train on train tasks #
        log.info("Training...")
        params = build_trainer_params(args, task_names=[])
        stop_metric = train_tasks[0].val_metric if len(train_tasks) == 1 else 'macro_avg'
        should_decrease = train_tasks[0].val_metric_decreases if len(train_tasks) == 1 else False
        trainer, _, opt_params, schd_params = build_trainer(params, model,
                                                            args.run_dir,
                                                            should_decrease)
        to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
        best_epochs = trainer.train(train_tasks, stop_metric,
                                    args.batch_size, args.bpp_base,
                                    args.weighting_method, args.scaling_method,
                                    to_train, opt_params, schd_params,
                                    args.shared_optimizer, args.load_model, phase="main")

    # Select model checkpoint from main training run to load
    if not args.train_for_eval:
        log.info("In strict mode because train_for_eval is off. "
                 "Will crash if any tasks are missing from the checkpoint.")
        strict = True
    else:
        strict = False

    if args.train_for_eval and not args.allow_reuse_of_pretraining_parameters:
        # If we're training models for evaluation, which is always done from scratch with a fresh
        # optimizer, we shouldn't load parameters for those models.
        # Usually, there won't be trained parameters to skip, but this can happen if a run is killed
        # during the train_for_eval phase.
        task_names_to_avoid_loading = [task.name for task in eval_tasks]
    else:
        task_names_to_avoid_loading = []

    if not args.load_eval_checkpoint == "none":
        log.info("Loading existing model from %s...", args.load_eval_checkpoint)
        load_model_state(model, args.load_eval_checkpoint,
                         args.cuda, task_names_to_avoid_loading, strict=strict)
    else:
        # Look for eval checkpoints (available only if we're restoring from a run that already
        # finished), then look for training checkpoints.
        eval_best = glob.glob(os.path.join(args.run_dir,
                                           "model_state_eval_best.th"))
        if len(eval_best) > 0:
            load_model_state(
                model,
                eval_best[0],
                args.cuda,
                task_names_to_avoid_loading,
                strict=strict)
        else:
            macro_best = glob.glob(os.path.join(args.run_dir,
                                                "model_state_main_epoch_*.best_macro.th"))
            if len(macro_best) > 0:
                assert_for_log(len(macro_best) == 1,
                               "Too many best checkpoints. Something is wrong.")
                load_model_state(
                    model,
                    macro_best[0],
                    args.cuda,
                    task_names_to_avoid_loading,
                    strict=strict)
            else:
                assert_for_log(
                    args.allow_untrained_encoder_parameters,
                    "No best checkpoint found to evaluate.")
                log.warning("Evaluating untrained encoder parameters!")

    # Train just the task-specific components for eval tasks.
    if args.train_for_eval:
        # might be empty if no elmo. scalar_mix_0 should always be pretrain scalars
        elmo_scalars = [(n, p) for n, p in model.named_parameters() if
                        "scalar_mix" in n and "scalar_mix_0" not in n]
        # fails when sep_embs_for_skip is 0 and elmo_scalars has nonzero length
        assert_for_log(not elmo_scalars or args.sep_embs_for_skip,
                       "Error: ELMo scalars loaded and will be updated in train_for_eval but "
                       "they should not be updated! Check sep_embs_for_skip flag or make an issue.")
        for task in eval_tasks:
            # Skip mnli-diagnostic
            # This has to be handled differently than probing tasks because probing tasks require the "is_probing_task"
            # to be set to True. For mnli-diagnostic this flag will be False because it is part of GLUE and
            # "is_probing_task is global flag specific to a run, not to a task.
            if task.name == 'mnli-diagnostic':
                continue
            pred_module = getattr(model, "%s_mdl" % task.name)
            to_train = elmo_scalars + [(n, p)
                                       for n, p in pred_module.named_parameters() if p.requires_grad]
            # Look for <task_name>_<param_name>, then eval_<param_name>
            params = build_trainer_params(args, task_names=[task.name, 'eval'])
            trainer, _, opt_params, schd_params = build_trainer(params, model,
                                                                args.run_dir,
                                                                task.val_metric_decreases)
            best_epoch = trainer.train([task], task.val_metric,
                                       args.batch_size, 1,
                                       args.weighting_method, args.scaling_method,
                                       to_train, opt_params, schd_params,
                                       args.shared_optimizer, load_model=False, phase="eval")

            # Now that we've trained a model, revert to the normal checkpoint logic for this task.
            task_names_to_avoid_loading.remove(task.name)

            # The best checkpoint will accumulate the best parameters for each task.
            # This logic looks strange. We think it works.
            best_epoch = best_epoch[task.name]
            layer_path = os.path.join(args.run_dir, "model_state_eval_best.th")
            load_model_state(
                model,
                layer_path,
                args.cuda,
                skip_task_models=task_names_to_avoid_loading,
                strict=strict)

    if args.do_eval:
        # Evaluate #
        log.info("Evaluating...")
        val_results, val_preds = evaluate.evaluate(model, eval_tasks,
                                                   args.batch_size,
                                                   args.cuda, "val")

        splits_to_write = evaluate.parse_write_preds_arg(args.write_preds)
        if 'val' in splits_to_write:
            evaluate.write_preds(eval_tasks, val_preds, args.run_dir, 'val',
                                 strict_glue_format=args.write_strict_glue_format)
        if 'test' in splits_to_write:
            _, te_preds = evaluate.evaluate(model, eval_tasks,
                                            args.batch_size, args.cuda, "test")
            evaluate.write_preds(tasks, te_preds, args.run_dir, 'test',
                                 strict_glue_format=args.write_strict_glue_format)
        run_name = args.get("run_name", os.path.basename(args.run_dir))

        results_tsv = os.path.join(args.exp_dir, "results.tsv")
        log.info("Writing results for split 'val' to %s", results_tsv)
        evaluate.write_results(val_results, results_tsv, run_name=run_name)

    log.info("Done!")
コード例 #28
0
def main(cl_arguments):
    ''' Train or load a model. Evaluate on some tasks. '''
    cl_args = handle_arguments(cl_arguments)
    args = config.params_from_file(cl_args.config_file, cl_args.overrides)

    # Raise error if obsolete arg names are present
    check_arg_name(args)

    # Logistics #
    maybe_make_dir(args.project_dir)  # e.g. /nfs/jsalt/exp/$HOSTNAME
    maybe_make_dir(args.exp_dir)      # e.g. <project_dir>/jiant-demo
    maybe_make_dir(args.run_dir)      # e.g. <project_dir>/jiant-demo/sst
    log.getLogger().addHandler(log.FileHandler(args.local_log_path))

    if cl_args.remote_log:
        from src.utils import gcp
        gcp.configure_remote_logging(args.remote_log_name)

    if cl_args.notify:
        from src.utils import emails
        global EMAIL_NOTIFIER
        log.info("Registering email notifier for %s", cl_args.notify)
        EMAIL_NOTIFIER = emails.get_notifier(cl_args.notify, args)

    if EMAIL_NOTIFIER:
        EMAIL_NOTIFIER(body="Starting run.", prefix="")

    _try_logging_git_info()

    log.info("Parsed args: \n%s", args)

    config_file = os.path.join(args.run_dir, "params.conf")
    config.write_params(args, config_file)
    log.info("Saved config to %s", config_file)

    seed = random.randint(1, 10000) if args.random_seed < 0 else args.random_seed
    random.seed(seed)
    torch.manual_seed(seed)
    log.info("Using random seed %d", seed)
    if args.cuda >= 0:
        try:
            if not torch.cuda.is_available():
                raise EnvironmentError("CUDA is not available, or not detected"
                                       " by PyTorch.")
            log.info("Using GPU %d", args.cuda)
            torch.cuda.set_device(args.cuda)
            torch.cuda.manual_seed_all(seed)
        except Exception:
            log.warning(
                "GPU access failed. You might be using a CPU-only installation of PyTorch. Falling back to CPU.")
            args.cuda = -1

    # Prepare data #
    log.info("Loading tasks...")
    start_time = time.time()
    pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args)
    if any([t.val_metric_decreases for t in pretrain_tasks]) and any(
            [not t.val_metric_decreases for t in pretrain_tasks]):
        log.warn("\tMixing training tasks with increasing and decreasing val metrics!")
    tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name)
    log.info('\tFinished loading tasks in %.3fs', time.time() - start_time)
    log.info('\t Tasks: {}'.format([task.name for task in tasks]))

    # Build model #
    log.info('Building model...')
    start_time = time.time()
    model = build_model(args, vocab, word_embs, tasks)
    log.info('\tFinished building model in %.3fs', time.time() - start_time)

    # Check that necessary parameters are set for each step. Exit with error if not.
    steps_log = []

    if not args.load_eval_checkpoint == 'none':
        assert_for_log(os.path.exists(args.load_eval_checkpoint),
                       "Error: Attempting to load model from non-existent path: [%s]" %
                       args.load_eval_checkpoint)
        assert_for_log(
            not args.do_pretrain,
            "Error: Attempting to train a model and then replace that model with one from a checkpoint.")
        steps_log.append("Loading model from path: %s" % args.load_eval_checkpoint)

    assert_for_log(args.transfer_paradigm in ["finetune", "frozen"],
                   "Transfer paradigm %s not supported!" % args.transfer_paradigm)

    if args.do_pretrain:
        assert_for_log(args.pretrain_tasks != "none",
                       "Error: Must specify at least on training task: [%s]" % args.pretrain_tasks)
        assert_for_log(
            args.val_interval %
            args.bpp_base == 0, "Error: val_interval [%d] must be divisible by bpp_base [%d]" %
            (args.val_interval, args.bpp_base))
        steps_log.append("Training model on tasks: %s" % args.pretrain_tasks)

    if args.do_target_task_training:
        steps_log.append("Re-training model for individual eval tasks")
        assert_for_log(
            args.eval_val_interval %
            args.bpp_base == 0, "Error: eval_val_interval [%d] must be divisible by bpp_base [%d]" %
            (args.eval_val_interval, args.bpp_base))
        assert_for_log(len(set(pretrain_tasks).intersection(target_tasks)) == 0
                       or args.allow_reuse_of_pretraining_parameters
                       or args.do_pretrain == 0,
                       "If you're pretraining on a task you plan to reuse as a target task, set\n"
                       "allow_reuse_of_pretraining_parameters = 1(risky), or train in two steps:\n"
                       "  train with do_pretrain = 1, do_target_task_training = 0, stop, and restart with\n"
                       "  do_pretrain = 0 and do_target_task_training = 1.")

    if args.do_full_eval:
        assert_for_log(args.target_tasks != "none",
                       "Error: Must specify at least one eval task: [%s]" % args.target_tasks)
        steps_log.append("Evaluating model on tasks: %s" % args.target_tasks)

    # Start Tensorboard if requested
    if cl_args.tensorboard:
        tb_logdir = os.path.join(args.run_dir, "tensorboard")
        _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port)

    log.info("Will run the following steps:\n%s", '\n'.join(steps_log))
    if args.do_pretrain:
        # Train on train tasks #
        log.info("Training...")
        stop_metric = pretrain_tasks[0].val_metric if len(pretrain_tasks) == 1 else 'macro_avg'
        should_decrease = pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False
        trainer, _, opt_params, schd_params = build_trainer(args, [], model,
                                                            args.run_dir,
                                                            should_decrease)
        to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
        _ = trainer.train(pretrain_tasks, stop_metric,
                          args.batch_size, args.bpp_base,
                          args.weighting_method, args.scaling_method,
                          to_train, opt_params, schd_params,
                          args.shared_optimizer, args.load_model, phase="main")

    # Select model checkpoint from main training run to load
    if not args.do_target_task_training:
        log.info("In strict mode because do_target_task_training is off. "
                 "Will crash if any tasks are missing from the checkpoint.")
        strict = True
    else:
        strict = False

    if args.do_target_task_training and not args.allow_reuse_of_pretraining_parameters:
        # If we're training models for evaluation, which is always done from scratch with a fresh
        # optimizer, we shouldn't load parameters for those models.
        # Usually, there won't be trained parameters to skip, but this can happen if a run is killed
        # during the do_target_task_training phase.
        task_names_to_avoid_loading = [task.name for task in target_tasks]
    else:
        task_names_to_avoid_loading = []

    if not args.load_eval_checkpoint == "none":
        # This is to load a particular eval checkpoint.
        log.info("Loading existing model from %s...", args.load_eval_checkpoint)
        load_model_state(model, args.load_eval_checkpoint,
                         args.cuda, task_names_to_avoid_loading, strict=strict)
    else:
        # Look for eval checkpoints (available only if we're restoring from a run that already
        # finished), then look for training checkpoints.

        if args.transfer_paradigm == "finetune":
            # Save model so we have a checkpoint to go back to after each task-specific finetune.
            model_state = model.state_dict()
            model_path = os.path.join(args.run_dir, "model_state_untrained_prefinetune.th")
            torch.save(model_state, model_path)

        best_path = get_best_checkpoint_path(args.run_dir)
        if best_path:
            load_model_state(model, best_path, args.cuda, task_names_to_avoid_loading,
                             strict=strict)
        else:
            assert_for_log(args.allow_untrained_encoder_parameters,
                           "No best checkpoint found to evaluate.")
            log.warning("Evaluating untrained encoder parameters!")

    # Train just the task-specific components for eval tasks.
    if args.do_target_task_training:
        if args.transfer_paradigm == "frozen":
            # might be empty if elmo = 0. scalar_mix_0 should always be pretrain scalars
            elmo_scalars = [(n, p) for n, p in model.named_parameters() if
                            "scalar_mix" in n and "scalar_mix_0" not in n]
            # Fails when sep_embs_for_skip is 0 and elmo_scalars has nonzero length.
            assert_for_log(not elmo_scalars or args.sep_embs_for_skip,
                           "Error: ELMo scalars loaded and will be updated in do_target_task_training but "
                           "they should not be updated! Check sep_embs_for_skip flag or make an issue.")

        for task in target_tasks:
            # Skip mnli-diagnostic
            # This has to be handled differently than probing tasks because probing tasks require the "is_probing_task"
            # to be set to True. For mnli-diagnostic this flag will be False because it is part of GLUE and
            # "is_probing_task is global flag specific to a run, not to a task.
            if task.name == 'mnli-diagnostic':
                continue

            if args.transfer_paradigm == "finetune":
                # Train both the task specific models as well as sentence encoder.
                to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad]
            else: # args.transfer_paradigm == "frozen":
                # Only train task-specific module.
                pred_module = getattr(model, "%s_mdl" % task.name)
                to_train = [(n, p) for n, p in pred_module.named_parameters() if p.requires_grad]
                to_train += elmo_scalars


            # Look for <task_name>_<param_name>, then eval_<param_name>
            trainer, _, opt_params, schd_params = build_trainer(args, [task.name, 'eval'],  model,
                                                                args.run_dir,
                                                                task.val_metric_decreases)
            _ = trainer.train(tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size,
                              n_batches_per_pass=1, weighting_method=args.weighting_method,
                              scaling_method=args.scaling_method, train_params=to_train,
                              optimizer_params=opt_params, scheduler_params=schd_params,
                              shared_optimizer=args.shared_optimizer, load_model=False, phase="eval")

            # Now that we've trained a model, revert to the normal checkpoint logic for this task.
            if task.name in task_names_to_avoid_loading:
                task_names_to_avoid_loading.remove(task.name)

            # The best checkpoint will accumulate the best parameters for each task.
            # This logic looks strange. We think it works.
            layer_path = os.path.join(args.run_dir, "model_state_eval_best.th")
            if args.transfer_paradigm == "finetune":
                # If we finetune,
                # Save this fine-tune model with a task specific name.
                finetune_path = os.path.join(args.run_dir, "model_state_%s_best.th" % task.name)
                os.rename(layer_path, finetune_path)

                # Reload the original best model from before target-task training.
                pre_finetune_path = get_best_checkpoint_path(args.run_dir)
                load_model_state(model, pre_finetune_path, args.cuda, skip_task_models=[], strict=strict)
            else: # args.transfer_paradigm == "frozen":
                # Load the current overall best model.
                # Save the best checkpoint from that target task training to be
                # specific to that target task.
                load_model_state(model, layer_path, args.cuda, strict=strict,
                                 skip_task_models=task_names_to_avoid_loading)

    if args.do_full_eval:
        # Evaluate #
        log.info("Evaluating...")
        splits_to_write = evaluate.parse_write_preds_arg(args.write_preds)
        if args.transfer_paradigm == "finetune":
            for task in target_tasks:
                if task.name == 'mnli-diagnostic': # we'll load mnli-diagnostic during mnli
                    continue

                finetune_path = os.path.join(args.run_dir, "model_state_%s_best.th" % task.name)
                if os.path.exists(finetune_path):
                    ckpt_path = finetune_path
                else:
                    ckpt_path = get_best_checkpoint_path(args.run_dir)
                load_model_state(model, ckpt_path, args.cuda, skip_task_models=[], strict=strict)

                tasks = [task]
                if task.name == 'mnli':
                    tasks += [t for t in target_tasks if t.name == 'mnli-diagnostic']
                evaluate_and_write(args, model, tasks, splits_to_write)

        elif args.transfer_paradigm == "frozen":
            evaluate_and_write(args, model, target_tasks, splits_to_write)

    log.info("Done!")
コード例 #29
0
ファイル: unsupervised.py プロジェクト: ShuheiKuriki/MUSE
# check parameters
assert not params.cuda or torch.cuda.is_available()
assert 0 <= params.dis_dropout < 1
assert 0 <= params.dis_input_dropout < 1
assert 0 <= params.dis_smooth < 0.5
assert params.dis_lambda > 0 and params.dis_steps > 0
assert 0 < params.lr_shrink <= 1
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)
assert params.dico_eval == 'default' or os.path.isfile(params.dico_eval)
assert params.export in ["", "txt", "pth"]

# build model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, generator, discriminator = build_model(params, True)
trainer = Trainer(src_emb, tgt_emb, generator, discriminator, params)
evaluator = Evaluator(trainer)

# Learning loop for Adversarial Training
if params.adversarial:
    logger.info('----> ADVERSARIAL TRAINING <----\n\n')

    # training loop
    for n_epoch in range(params.n_epochs):

        logger.info('Starting adversarial training epoch %i...', n_epoch)
        tic = time.time()
        n_words_proc = 0
        stats = {'DIS_COSTS': [], 'MAP_COSTS': []}
コード例 #30
0
ファイル: score.py プロジェクト: wangqi1996/njunmt
def train(config_path, model_path, model_type, src_filename, trg_filename):
    """
    flags:
        saveto: str
        reload: store_true
        config_path: str
        pretrain_path: str, default=""
        model_name: str
        log_path: str
    """

    # ================================================================================== #
    # Initialization for training on different devices
    # - CPU/GPU
    # - Single/Distributed
    Constants.USE_GPU = True
    print(config_path)
    print(model_path)
    print(model_type)

    world_size = 1
    rank = 0
    local_rank = 0

    if Constants.USE_GPU:
        torch.cuda.set_device(local_rank)
        Constants.CURRENT_DEVICE = "cuda:{0}".format(local_rank)
    else:
        Constants.CURRENT_DEVICE = "cpu"

    # ================================================================================== #
    # Parsing configuration files
    # - Load default settings
    # - Load pre-defined settings
    # - Load user-defined settings

    configs = prepare_configs(config_path)

    data_configs = configs['data_configs']
    model_configs = configs['model_configs']
    training_configs = configs['training_configs']

    INFO(pretty_configs(configs))

    Constants.SEED = training_configs['seed']
    set_seed(Constants.SEED)
    timer = Timer()

    # ================================================================================== #
    # Load Data

    INFO('Loading data...')
    timer.tic()

    # Generate target dictionary
    vocab_src = Vocabulary.build_from_file(**data_configs['vocabularies'][0])
    vocab_tgt = Vocabulary.build_from_file(**data_configs['vocabularies'][1])

    Constants.EOS = vocab_src.eos
    Constants.PAD = vocab_src.pad
    Constants.BOS = vocab_src.bos

    valid_bitext_dataset = ZipDataset(
        TextLineDataset(
            data_path=src_filename,
            vocabulary=vocab_src,
            max_len=100,
            is_train_dataset=False,
        ),
        TextLineDataset(
            data_path=trg_filename,
            vocabulary=vocab_tgt,
            is_train_dataset=False,
            max_len=100,
        ))

    valid_iterator = DataIterator(dataset=valid_bitext_dataset,
                                  batch_size=20,
                                  use_bucket=training_configs['use_bucket'],
                                  buffer_size=training_configs['buffer_size'],
                                  numbering=True,
                                  world_size=world_size,
                                  rank=rank)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # ================================ Begin ======================================== #
    # Build Model & Optimizer
    # We would do steps below on after another
    #     1. build models & criterion
    #     2. move models & criterion to gpu if needed
    #     3. load pre-trained model if needed
    #     4. build optimizer
    #     5. build learning rate scheduler if needed
    #     6. load checkpoints if needed

    # 0. Initial

    # 1. Build Model & Criterion
    INFO('Building model...')
    timer.tic()
    nmt_model = build_model(n_src_vocab=vocab_src.max_n_words,
                            n_tgt_vocab=vocab_tgt.max_n_words,
                            padding_idx=vocab_src.pad,
                            vocab_src=vocab_src,
                            **model_configs)
    INFO(nmt_model)

    # 2. Move to GPU
    if Constants.USE_GPU:
        nmt_model = nmt_model.cuda()

    # 3. Load pretrained model if needed
    load_pretrained_model(nmt_model,
                          model_path,
                          device=Constants.CURRENT_DEVICE)

    INFO('Done. Elapsed time {0}'.format(timer.toc()))

    # ================================================================================== #
    # Prepare training

    sent_per_sec_meter = TimeMeter()
    tok_per_sec_meter = TimeMeter()

    grad_denom = 0
    train_loss = 0.0
    cum_n_words = 0
    valid_loss = best_valid_loss = float('inf')

    sent_per_sec_meter.start()
    tok_per_sec_meter.start()

    INFO('Begin training...')
    eidx = 0
    uidx = 0
    score_result = dict()

    # Build iterator and progress bar
    training_iter = valid_iterator.build_generator()

    training_progress_bar = tqdm(desc=' - (Epc {}, Upd {}) '.format(
        eidx, uidx),
                                 total=len(valid_iterator),
                                 unit="sents")

    for batch in training_iter:
        seqs_numbers, seqs_x, seqs_y = batch

        batch_size = len(seqs_x)
        cum_n_words += sum(len(s) for s in seqs_y)

        try:
            # Prepare data
            x, y = prepare_data(seqs_x, seqs_y, cuda=Constants.USE_GPU)

            y_inp = y[:, :-1].contiguous()
            y_label = y[:, 1:].contiguous()  # [batch_size, seq_len]
            log_probs = nmt_model(
                x, y_inp, log_probs=True)  # [batch_size, seq_len, vocab_size]

            _, seq_len = y_label.shape
            log_probs = log_probs.view(-1, vocab_tgt.max_n_words)
            y_label = y_label.view(-1)
            loss = F.nll_loss(log_probs,
                              y_label,
                              reduce=False,
                              ignore_index=vocab_tgt.pad)
            loss = loss.view(batch_size, seq_len)
            loss = loss.sum(-1)

            y_label = y_label.view(batch_size, seq_len)
            valid_token = (y_label != vocab_tgt.pad).sum(-1)
            loss = loss.double().div(valid_token.double())
            for seq_num, l in zip(seqs_numbers, loss):
                assert seq_num not in score_result
                score_result.update({seq_num: l.item()})

            uidx += 1
            grad_denom += batch_size

        except RuntimeError as e:
            if 'out of memory' in str(e):
                print('| WARNING: ran out of memory, skipping batch')
            else:
                raise e

        if training_progress_bar is not None:
            training_progress_bar.update(batch_size)
            training_progress_bar.set_description(
                ' - (Epc {}, Upd {}) '.format(eidx, uidx))

            postfix_str = 'TrainLoss: {:.2f}, ValidLoss(best): {:.2f} ({:.2f}), '.format(
                train_loss, valid_loss, best_valid_loss)
            training_progress_bar.set_postfix_str(postfix_str)

    training_progress_bar.close()
    return score_result
コード例 #31
0
params = parser.parse_args()

# check parameters
assert not params.cuda or torch.cuda.is_available()
assert params.dico_train in ["identical_char", "default"] or os.path.isfile(
    params.dico_train)
assert params.dico_build in ["S2T", "T2S", "S2T|T2S", "S2T&T2S"]
assert params.dico_max_size == 0 or params.dico_max_size < params.dico_max_rank
assert params.dico_max_size == 0 or params.dico_max_size > params.dico_min_size
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)
assert params.export in ["", "txt", "pth"]

# build logger / model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping, _ = build_model(params, False)
trainer = Trainer(src_emb, tgt_emb, mapping, None, params)
evaluator = Evaluator(trainer)

# load a training dictionary. if a dictionary path is not provided, use a default
# one ("default") or create one based on identical character strings ("identical_char")
trainer.load_training_dico(params.dico_train)
"""
Learning loop for Procrustes Iterative Learning
"""
for n_iter in range(params.n_refinement + 1):

    logger.info('Starting iteration %i...' % n_iter)

    # build a dictionary from aligned embeddings (unless
    # it is the first iteration and we use the init one)
コード例 #32
0
params = parser.parse_args()

# check parameters
assert not params.cuda or torch.cuda.is_available()
assert params.dico_train in ["identical_char", "default"] or os.path.isfile(params.dico_train)
assert params.dico_build in ["S2T", "T2S", "S2T|T2S", "S2T&T2S"]
assert params.dico_max_size == 0 or params.dico_max_size < params.dico_max_rank
assert params.dico_max_size == 0 or params.dico_max_size > params.dico_min_size
assert os.path.isfile(params.src_emb)
assert os.path.isfile(params.tgt_emb)
assert params.dico_eval == 'default' or os.path.isfile(params.dico_eval)
assert params.export in ["", "txt", "pth"]

# build logger / model / trainer / evaluator
logger = initialize_exp(params)
src_emb, tgt_emb, mapping = build_model(params)
trainer = Trainer(src_emb, tgt_emb, mapping, params)
evaluator = Evaluator(trainer)

# load a training dictionary. if a dictionary path is not provided, use a default
# one ("default") or create one based on identical character strings ("identical_char")
trainer.load_training_dico(params.dico_train)

# define the validation metric
VALIDATION_METRIC = VALIDATION_METRIC_UNSUP if params.dico_train == 'identical_char' else VALIDATION_METRIC_SUP
logger.info("Validation metric: %s" % VALIDATION_METRIC)


"""
Learning loop for crosslingual training
"""