Esempio n. 1
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecGenerator(trainer, data, params)

    # evaluation
    if params.eval_only:
        evaluator.generate(trainer)
        exit()
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    hidden_size = 1024
    encoder = EncoderRNN.EncoderRNN(params.n_words, hidden_size).cuda()
    decoder = Attention_decoder.Attention_decoder(hidden_size,
                                                  params.n_words,
                                                  dropout_p=0.1).cuda()
    trainer = LSTM_Trainer(encoder, decoder, data, params)
    evaluator = LSTM_Evaluator(trainer, data, params)
    # set sampling probabilities for training
    set_sampling_probs(data, params)
    # language model training
    for count in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.try_lstm(lang1, lang2, params.lambda_mt)

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)

    # save the output of softmax
    trainer.save_softmax_output(clm_temp, 'clm_temp')
    trainer.save_softmax_output(ml_temp, 'ml_temp')
    trainer.save_softmax_output(bt_temp, 'bt_temp')
Esempio n. 3
0
def main(params, params_pretrain, trainer_class):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # Model
    if params.pretrain:
        #if params.model_path :
        #    params_pretrain.reload_model="%s,%s"%(params.model_path, params.model_path)
        for attr_name in ['n_gpu_per_node', 'multi_gpu', 'is_master']:
            setattr(params_pretrain, attr_name, getattr(params, attr_name))
        pre_trainer, evaluator, _ = get_trainer_evaluator(
            params_pretrain, logger)

    else:
        pre_trainer, evaluator = None, None

    model = build_model(params, logger, pre_trainer=pre_trainer)

    # Data
    train_dataset, val_dataset = load_dataset(params, logger, model)

    # optimizers
    optimizers = model.get_optimizers(params) if not params.eval_only else []

    # Trainer
    trainer = trainer_class(params_pretrain, params, model, optimizers,
                            train_dataset, val_dataset, logger, pre_trainer,
                            evaluator)

    if params.pretrain:
        assert id(trainer.model.embedder.model) == id(
            getattr(pre_trainer, params.reload_key))

    # Run train/evaluation
    logger.info("")
    if not params.eval_only:
        trainer.train(get_loss, end_of_epoch)
    else:
        trainer.eval(get_loss, end_of_epoch)
Esempio n. 4
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    _lang1, _lang2 = (
        params.langs[0],
        params.langs[1]) if params.langs[0] < params.langs[1] else (
            params.langs[1], params.langs[0])
    dataset = data['para'][(_lang1, _lang2)]['test']
    print(params.n_words)
    print("ref_paths" + str(params.ref_paths))
    for i, ((x1, len1, id1, lenid1), (x2, len2, id2, lenid2)) in enumerate(
            dataset.get_iterator(shuffle=False,
                                 group_by_size=True,
                                 n_sentences=-1,
                                 tokens_per_batch=2000)):
        print('x2' + str(x2.size()))
        print("len2[None] - 1" + str(len2[None] - 1) + " " + str(len2[None]))
        print(str(len2[0]))
        print('len2' + str(len2))
        alen = torch.arange(len2.max(), dtype=torch.long, device=len2.device)
        # do not predict anything given the last target word
        pred_mask = alen[:, None] < len2[None] - 1
        print("pred_mask" + str(pred_mask))
        print(str(pred_mask.size()))
        y = x2[1:].masked_select(pred_mask[:-1])
        print("yyyy" + str(y))
        print(str(y.size()))
        assert len(y) == (len2 - 1).sum().item()
Esempio n. 5
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    meta_params = copy.deepcopy(params).meta_params
    params.meta_params = "..."  # to long to be log
    logger = initialize_exp(params)
    params.meta_params = meta_params

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # todo : good params.n_words (We take the one from the first task have this parameter for the moment.)
    """
    But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, 
    and therefore no problem if we choose one at random.
    """
    p = params.meta_params[data['key']]

    # build model
    if params.encoder_only:
        model = build_model(params=p, dico=data['dico'])
    else:
        encoder, decoder = build_model(params=p, dico=data['dico'])

    # todo : good pad_index and eos_index and ... (I'll take the one from the first task for the moment.)
    """
    But we think that if all the task data are based on the same vocabulary, all these parameters will be the same, 
    and therefore no problem if we choose one at random.
    """
    params.pad_index = p.pad_index
    params.eos_index = p.eos_index

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        if not params.meta_learning:
            for k, v in scores.items():
                logger.info("%s -> %.6f" % (k, v))
        else:
            for lgs in params.meta_params.keys():
                logger.info("============ task : %s " % lgs)
                for k, v in scores[lgs].items():
                    if k != "epoch":
                        logger.info("%s -> %.6f" % (k, v))
            logger.info("============ all")
            for k, v in scores.items():
                if not (k in (list(params.meta_params.keys()) + ['epoch'])):
                    logger.info("%s -> %.6f" % (k, v))

        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        if not params.meta_learning:
            trainer.n_sentences = 0
            while trainer.n_sentences < trainer.epoch_size:
                # CLM steps
                for lang1, lang2 in shuf_order(params.clm_steps, params):
                    trainer.clm_step(lang1, lang2, params.lambda_clm)

                # MLM steps (also includes TLM if lang2 is not None)
                for lang1, lang2 in shuf_order(params.mlm_steps, params):
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

                # parallel classification steps
                for lang1, lang2 in shuf_order(params.pc_steps, params):
                    trainer.pc_step(lang1, lang2, params.lambda_pc)

                # denoising auto-encoder steps
                for lang in shuf_order(params.ae_steps):
                    trainer.mt_step(lang, lang, params.lambda_ae)

                # machine translation steps
                for lang1, lang2 in shuf_order(params.mt_steps, params):
                    trainer.mt_step(lang1, lang2, params.lambda_mt)

                # back-translation steps
                for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                    trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

                trainer.iter()
        else:
            # our
            trainer.n_sentences = {}
            """
            Here we build language lists for each of our meta-taks. Indeed, for two language lists l1 and l2, 
            the objective will be done with l1[i] and l2[i] respectively, this for each index i of the two lists. 
            """
            lang1_dic, lang2_dic, lang3_dic = {}, {}, {}
            """
            In the case of meta-learning, we have a (meta-)data dictionary for each (meta-)task, 
            so the keys are the languages conserved by the task. 
            """
            data_keys_dic = {}

            # equivalent to "for task in list of task" in the original algorithm,  except here we prepare all the tasks beforehand.
            for lgs in params.meta_params.keys():
                trainer.n_sentences[lgs] = 0

                # CLM
                try:
                    lang1_dic['clm_step']
                except KeyError:
                    lang1_dic['clm_step'], lang2_dic[
                        'clm_step'], data_keys_dic['clm_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].clm_steps, params):
                    lang1_dic['clm_step'].append(lang1)
                    lang2_dic['clm_step'].append(lang2)
                    data_keys_dic['clm_step'].append(lgs)

                # MLM
                try:
                    lang1_dic['mlm_step']
                except KeyError:
                    lang1_dic['mlm_step'], lang2_dic[
                        'mlm_step'], data_keys_dic['mlm_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].mlm_steps, params):
                    lang1_dic['mlm_step'].append(lang1)
                    lang2_dic['mlm_step'].append(lang2)
                    data_keys_dic['mlm_step'].append(lgs)

                # parallel classification
                try:
                    lang1_dic['pc_step']
                except KeyError:
                    lang1_dic['pc_step'], lang2_dic['pc_step'], data_keys_dic[
                        'pc_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].pc_steps, params):
                    lang1_dic['pc_step'].append(lang1)
                    lang2_dic['pc_step'].append(lang2)
                    data_keys_dic['pc_step'].append(lgs)

                # denoising auto-encoder
                try:
                    lang1_dic['ae_step']
                except KeyError:
                    lang1_dic['ae_step'], data_keys_dic['ae_step'] = [], []
                for lang1 in shuf_order(params.meta_params[lgs].ae_steps):
                    lang1_dic['ae_step'].append(lang1)
                    data_keys_dic['ae_step'].append(lgs)

                # machine translation
                try:
                    lang1_dic['mt_step']
                except KeyError:
                    lang1_dic['mt_step'], lang2_dic['mt_step'], data_keys_dic[
                        'mt_step'] = [], [], []
                for lang1, lang2 in shuf_order(
                        params.meta_params[lgs].mt_steps, params):
                    lang1_dic['mt_step'].append(lang1)
                    lang2_dic['mt_step'].append(lang2)
                    data_keys_dic['mt_step'].append(lgs)

                # back-translation
                try:
                    lang1_dic['bt_step']
                except KeyError:
                    lang1_dic['bt_step'], lang2_dic['bt_step'], lang3_dic[
                        'bt_step'], data_keys_dic['bt_step'] = [], [], [], []
                for lang1, lang2, lang3 in shuf_order(
                        params.meta_params[lgs].bt_steps):
                    lang1_dic['bt_step'].append(lang1)
                    lang2_dic['bt_step'].append(lang2)
                    lang3_dic['bt_step'].append(lang3)
                    data_keys_dic['bt_step'].append(lgs)

            flag = True

            # equivalent to "while not done do" in the original algorithm
            while flag:

                # CLM steps
                #print("clm_step", flag)
                a = trainer.clm_step(lang1_dic['clm_step'],
                                     lang2_dic['clm_step'], params.lambda_clm,
                                     data_keys_dic['clm_step'])

                #print("mlm_step", flag)
                # MLM steps (also includes TLM if lang2 is not None)
                b = trainer.mlm_step(lang1_dic['mlm_step'],
                                     lang2_dic['mlm_step'], params.lambda_mlm,
                                     data_keys_dic['mlm_step'])

                # parallel classification steps
                c = trainer.pc_step(lang1_dic['pc_step'], lang2_dic['pc_step'],
                                    params.lambda_pc, data_keys_dic['pc_step'])

                if isinstance(trainer, EncDecTrainer):

                    # denoising auto-encoder steps
                    d = trainer.mt_step(lang1_dic['ae_step'],
                                        lang1_dic['ae_step'], params.lambda_ae,
                                        data_keys_dic['ae_step'])

                    # machine translation steps
                    e = trainer.mt_step(lang1_dic['mt_step'],
                                        lang2_dic['mt_step'], params.lambda_mt,
                                        data_keys_dic['mt_step'])

                    # back-translation steps
                    f = trainer.bt_step(lang1_dic['bt_step'],
                                        lang2_dic['bt_step'],
                                        lang3_dic['bt_step'], params.lambda_bt,
                                        data_keys_dic['bt_step'])

                    # do things better
                    if (not a) and (not b) and (not c) and (not d) and (
                            not e) and (not f):
                        flag = False  # End of epoch
                    else:
                        flag = True
                else:
                    # do things better
                    if (not a) and (not b) and (not c):
                        flag = False  # End of epoch
                    else:
                        flag = True

                trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        if not params.meta_learning:
            for k, v in scores.items():
                logger.info("%s -> %.6f" % (k, v))
        else:
            for lgs in params.meta_params.keys():
                logger.info("============ task : %s " % lgs)
                for k, v in scores[lgs].items():
                    if k != "epoch":
                        logger.info("%s -> %.6f" % (k, v))
            logger.info("============ all")
            for k, v in scores.items():
                if not (k in (list(params.meta_params.keys()) + ['epoch'])):
                    logger.info("%s -> %.6f" % (k, v))

        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)

        # our
        logger.info("============ garbage collector collecting %d ..." %
                    gc.collect())
Esempio n. 6
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    voc_path = "/home/zchen/XLM/data/processed/XLM_en_zh/50k/vocab"
    data = load_wikisum_data(voc_path, params.data_path, params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    elif params.WS:
        global_encoder, local_encoder, decoder = build_model(
            params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    trainer = WikisumTrainer(global_encoder, local_encoder, decoder, data,
                             params)
    evaluator = WikisumEvaluator(trainer, data, params, data_set="valid")

    # evaluation
    if params.eval_only:
        scores = evaluator.evaluate(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):
        trainer.epoch += 1

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        for i, batch in enumerate(trainer.dataloader):
            for lang1, lang2 in shuf_order(params.ws_steps, params):
                trainer.step(lang1, lang2, batch)

            trainer.iter()
            # if i > 1000:
            # break

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.evaluate(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 7
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    model = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    trainer = Trainer(model, data, params)
    evaluator = Evaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:
            # MLM steps
            trainer.mlm_step(params.lambda_mlm)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 8
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # float16
    if params.fp16:
        assert torch.backends.cudnn.enabled
        if params.encoder_only:
            model = network_to_half(model)
        else:
            encoder = network_to_half(encoder)
            decoder = network_to_half(decoder)

    # distributed
    if params.multi_gpu:
        logger.info("Using nn.parallel.DistributedDataParallel ...")
        if params.encoder_only:
            model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
        else:
            encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True)
            decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True)

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        logger.info('Evaluating and saving new result file')
        scores = evaluator.run_all_evals_match(trainer)
        for k, v in scores.items():
            if 'likelihood' in k:
                logger.info("%s -> %.6f" % (k, np.mean(v)))
            elif 'scores' in k:
                logger.info("%s -> %s" % (k, v.shape))
            else:
                logger.info("%s -> %.6f" % (k, v))

        np.savetxt(os.path.join(params.dump_path, 'best-fwd-prediction.txt'),scores['%s_%s_fwd_scores' % ('test', params.mass_steps[0])],fmt='%f')
        for match in params.match_files.split(','):
            np.savetxt(os.path.join(params.dump_path, 'best-match-prediction{}.txt'.format(match.split('.')[-1])),
                   scores['%s_%s_sentence_likelihood' % (match, params.mass_steps[0])], fmt='%f')
        labels = np.loadtxt(os.path.join(params.data_path, 'labels'))
        targets = np.loadtxt(os.path.join(params.data_path, 'suffix'))
        preds = scores['%s_%s_sentence_likelihood' % ('match', params.mass_steps[0])]
        results = pd.DataFrame({'label': labels, 'target': targets, 'pred': preds})
        results.to_pickle(os.path.join(params.dump_path, 'best-matching-prediction.pkl'))
        #logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)
    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # mass prediction steps
            for lang in shuf_order(params.mass_steps):
                trainer.mass_step(lang, params.lambda_mass)
            trainer.iter()
        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_epoch_evals_match(trainer)
        # print / JSON log
        for k, v in scores.items():
            if 'likelihood' in k:
                logger.info("%s -> %.6f" % (k, np.mean(v)))
            elif 'scores' in k:
                logger.info("%s -> %s" % (k, v.shape))
            else:
                logger.info("%s -> %.6f" % (k, v))
        #if params.is_master:
            #logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 9
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # float16
    if params.fp16:
        assert torch.backends.cudnn.enabled
        if params.encoder_only:
            model = network_to_half(model)
        else:
            encoder = network_to_half(encoder)
            decoder = network_to_half(decoder)

    # distributed
    # if params.multi_gpu:
    #     logger.info("Using nn.parallel.DistributedDataParallel ...")
    #     if params.encoder_only:
    #         model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True)
    #     else:
    #         encoder = apex.parallel.DistributedDataParallel(encoder, delay_allreduce=True)
    #         decoder = apex.parallel.DistributedDataParallel(decoder, delay_allreduce=True)

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # mass prediction steps
            for lang in shuf_order(params.mass_steps):
                trainer.mass_step(lang, params.lambda_mass)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)
            
            # back-parallel steps
            for lang1, lang2 in shuf_order(params.bmt_steps, params):
                trainer.bmt_step(lang1, lang2, params.lambda_bmt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 10
0
def clts_elmo_main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # cross lingual encoder

    # cross lingual  text summarization encoder, text summarization decoder
    elmo, ts_encoder, ts_decoder = build_clts_elmo_model(params, data['dico'])

    trainer = XLMCLTSEncDecTrainer(elmo, ts_encoder, ts_decoder, data, params)
    evaluator = XLMCLTSEncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 11
0
def main(params):

    # initialize the multi-GPU / multi-node training
    # initialize experiment / SLURM signal handler for time limit / pre-emption
    init_distributed_mode(params)
    logger = initialize_exp(params)
    init_signal_handler()

    # CPU / CUDA
    if params.cpu:
        assert not params.multi_gpu
    else:
        assert torch.cuda.is_available()
    src.utils.CUDA = not params.cpu

    # build environment / modules / trainer / evaluator
    env = build_env(params)
    modules = build_modules(env, params)
    trainer = Trainer(modules, env, params)
    evaluator = Evaluator(trainer)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals()
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_equations = 0

        while trainer.n_equations < trainer.epoch_size:

            # training steps
            for task_id in np.random.permutation(len(params.tasks)):
                task = params.tasks[task_id]
                if params.export_data:
                    trainer.export_data(task)
                else:
                    trainer.enc_dec_step(task)
                trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals()

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment / load data
    logger = initialize_exp(params)

    # Seed
    torch.manual_seed(params.seed)
    torch.cuda.manual_seed_all(params.seed)

    # initialize SLURM signal handler for time limit / pre-emption
    if params.is_slurm_job:
        init_signal_handler()

    # data loaders / samplers
    populate_dataset(params)
    train_data_loader, train_sampler, _ = get_data_loader(
        img_size=params.img_size,
        crop_size=params.crop_size,
        shuffle=True,
        batch_size=params.batch_size,
        num_classes=params.num_classes,
        nb_workers=params.nb_workers,
        distributed_sampler=params.multi_gpu,
        dataset=params.dataset,
        data_path=params.train_path,
        transform=params.train_transform,
        split='valid' if params.debug_train else 'train',
        seed=params.seed)

    valid_data_loader, _, _ = get_data_loader(img_size=params.img_size,
                                              crop_size=params.crop_size,
                                              shuffle=False,
                                              batch_size=params.batch_size,
                                              num_classes=params.num_classes,
                                              nb_workers=params.nb_workers,
                                              distributed_sampler=False,
                                              dataset=params.dataset,
                                              transform='center',
                                              split='valid',
                                              seed=params.seed)

    # build model / cuda
    logger.info("Building %s model ..." % params.architecture)
    ftmodel = build_model(params)
    ftmodel.fc = nn.Sequential()
    ftmodel.eval().cuda()

    linearmodel = nn.Linear(EMBEDDING_SIZE[params.architecture],
                            params.num_classes).cuda()

    if params.from_ckpt != "":
        ckpt = torch.load(params.from_ckpt)
        state_dict = {
            k.replace("module.", ""): v
            for k, v in ckpt['model'].items()
        }

        del state_dict["fc.weight"]
        if "fc.bias" in state_dict:
            del state_dict["fc.bias"]
        missing_keys, unexcepted_keys = ftmodel.load_state_dict(state_dict,
                                                                strict=False)
        print("Missing keys: ", missing_keys)
        print("Unexcepted keys: ", unexcepted_keys)

    # distributed  # TODO: check this https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main.py#L142
    if params.multi_gpu:
        logger.info("Using nn.parallel.DistributedDataParallel ...")
        linearmodel = nn.parallel.DistributedDataParallel(
            linearmodel,
            device_ids=[params.local_rank],
            output_device=params.local_rank,
            broadcast_buffers=True)

    # build trainer / reload potential checkpoints / build evaluator
    trainer = Trainer(model=linearmodel, params=params, ftmodel=ftmodel)
    trainer.reload_checkpoint()
    evaluator = Evaluator(trainer, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer,
                                         evals=['classif'],
                                         data_loader=valid_data_loader)

        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # training
    for epoch in range(trainer.epoch, params.epochs):

        # update epoch / sampler / learning rate
        trainer.epoch = epoch
        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)
        if params.multi_gpu:
            train_sampler.set_epoch(epoch)

        # update learning rate
        trainer.update_learning_rate()

        # train
        for i, (images, targets) in enumerate(train_data_loader):
            trainer.classif_step(images, targets)
            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate classification accuracy
        scores = evaluator.run_all_evals(trainer,
                                         evals=['classif'],
                                         data_loader=valid_data_loader)

        for name, val in trainer.get_scores().items():
            scores[name] = val

        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 13
0
File: main.py Progetto: GG-yuki/bugs
def main(args):
    """
    This code implements the paper: https://arxiv.org/abs/1905.01278
    The method consists in alternating between a hierachical clustering of the
    features and learning the parameters of a convnet by predicting both the
    angle of the rotation applied to the input data and the cluster assignments
    in a single hierachical loss.
           """

    # initialize communication groups
    training_groups, clustering_groups = init_distributed_mode(args)

    # check parameters
    check_parameters(args)

    # initialize the experiment
    logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec',
                                            'loss', 'prec_super_class',
                                            'loss_super_class',
                                            'prec_sub_class', 'loss_sub_class')

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    dataset = YFCC100M_dataset(r'./dataset', size=args.size_dataset)

    # prepare the different data transformations
    tr_cluster, tr_train = get_data_transformations(args.rotation * 90)

    # build model skeleton
    fix_random_seeds()
    model = model_factory(args.sobel)
    logger.info('model created')

    # load pretrained weights
    load_pretrained(model, args)

    # convert batch-norm layers to nvidia wrapper to enable batch stats reduction
    model = apex.parallel.convert_syncbn_model(model)

    # distributed training wrapper
    model = to_cuda(model, args.gpu_to_work_on, apex=False)
    logger.info('model to cuda')

    # set optimizer
    optimizer = sgd_optimizer(model, args.lr, args.wd)

    # load cluster assignments
    cluster_assignments = load_cluster_assignments(args, dataset)

    # build prediction layer on the super_class
    pred_layer, optimizer_pred_layer = build_prediction_layer(
        model.module.body.dim_output_space,
        args,
    )

    nmb_sub_classes = args.k // args.nmb_super_clusters
    sub_class_pred_layer, optimizer_sub_class_pred_layer = build_prediction_layer(
        model.module.body.dim_output_space,
        args,
        num_classes=nmb_sub_classes,
        group=training_groups[args.training_local_world_id],
    )

    # variables to fetch in checkpoint
    to_restore = {'epoch': 0, 'start_iter': 0}

    # re start from checkpoint
    restart_from_checkpoint(
        args,
        run_variables=to_restore,
        state_dict=model,
        optimizer=optimizer,
        pred_layer_state_dict=pred_layer,
        optimizer_pred_layer=optimizer_pred_layer,
    )
    pred_layer_name = str(args.training_local_world_id) + '-pred_layer.pth.tar'
    restart_from_checkpoint(
        args,
        ckp_path=os.path.join(args.dump_path, pred_layer_name),
        state_dict=sub_class_pred_layer,
        optimizer=optimizer_sub_class_pred_layer,
    )
    args.epoch = to_restore['epoch']
    args.start_iter = to_restore['start_iter']

    for _ in range(args.epoch, args.nepochs):

        logger.info("============ Starting epoch %i ... ============" %
                    args.epoch)
        fix_random_seeds(args.epoch)

        # step 1: Get the final activations for the whole dataset / Cluster them

        if cluster_assignments is None and not args.epoch % args.reassignment:

            logger.info("=> Start clustering step")
            dataset.transform = tr_cluster

            cluster_assignments = get_cluster_assignments(
                args, model, dataset, clustering_groups)

            # reset prediction layers
            if args.nmb_super_clusters > 1:
                pred_layer, optimizer_pred_layer = build_prediction_layer(
                    model.module.body.dim_output_space,
                    args,
                )
            sub_class_pred_layer, optimizer_sub_class_pred_layer = build_prediction_layer(
                model.module.body.dim_output_space,
                args,
                num_classes=nmb_sub_classes,
                group=training_groups[args.training_local_world_id],
            )

        # step 2: Train the network with the cluster assignments as labels

        # prepare dataset
        dataset.transform = tr_train
        dataset.sub_classes = cluster_assignments

        # concatenate models and their corresponding optimizers
        models = [model, pred_layer, sub_class_pred_layer]
        optimizers = [
            optimizer, optimizer_pred_layer, optimizer_sub_class_pred_layer
        ]

        # train the network for one epoch
        scores = train_network(args, models, optimizers, dataset)

        ## save training statistics
        logger.info(scores)
        training_stats.update(scores)

        # reassign clusters at the next epoch
        if not args.epoch % args.reassignment:
            cluster_assignments = None
            dataset.subset_indexes = None
            end_of_epoch(args)

        dist.barrier()
Esempio n. 14
0
def main(params):
    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    if params.build_nmt_domain_feature:
        import torch
        from src.curriculum import build_nmt_domain_feature
        dataset = data['para'][('de', 'en')]['train']
        batches, indices = dataset.get_iterator(
            shuffle=False,
            group_by_size=params.group_by_size,
            n_sentences=-1,
        )

        features = build_nmt_domain_feature(data, params, batches, dataset)
        result = {'indices': indices, 'domain_feature': features}
        torch.save(result, params.build_output_path)
        return

    if params.build_nlm_domain_feature:
        import torch
        from src.curriculum import build_nlm_domain_feature
        dataset = data['para'][('de', 'en')]['train']
        batches, indices = dataset.get_iterator(
            shuffle=False,
            group_by_size=params.group_by_size,
            n_sentences=-1,
        )
        # dataset = data['mono_stream']['en']['train']
        # indices = dataset.get_iterator(
        #     shuffle=False
        # )

        features, domain_score, sents = build_nlm_domain_feature(
            data, params, batches, dataset)
        result = {
            'indices': indices,
            'domain_feature': features,
            'domain_score': domain_score,
            'sents': sents
        }
        if params.build_output_path.endswith('pth'):
            build_output_path = params.build_output_path
        else:
            build_output_path = f'{params.build_output_path}/final.pth'
        torch.save(result, build_output_path)
        return

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    elif params.dual_encoder:
        encoder1, encoder2 = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    elif params.domains:
        if params.curriculum_learning:
            trainer = CurriculumTrainer(encoder, decoder, data, params)
        elif params.dual_encoder:
            trainer = DualEncoderTrainer(encoder1, encoder2, data, params)
        else:
            trainer = MultiDomainTrainer(encoder, decoder, data, params)
        if params.local_adapt:
            evaluator = MetaMultiDomainEvaluator(trainer, data, params)
        else:
            if params.curriculum_learning:
                evaluator = EncDecEvaluator(trainer, data, params)
            elif params.dual_encoder:
                evaluator = DualEncoderEvaluator(trainer, data, params)
            else:
                evaluator = MultiDomainEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

            if not params.dual_encoder and params.domains and params.domain_ratio_update_freq > 0 and trainer.n_total_iter % params.domain_ratio_update_freq == 0 and not params.sampling_uniform:
                evaluator.update_language_sampler_multidomain()
                evaluator.update_dataset_ratio(trainer)

            if not params.dual_encoder and params.domain_reset_freq > 0 and trainer.n_total_iter % params.domain_reset_freq == 0:
                evaluator.reset_dataset_ratio(trainer)

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 15
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment / load data
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    if params.is_slurm_job:
        init_signal_handler()

    if params.dataset == "imagenet":
        params.num_classes = 1000
        params.img_size = 256
        params.crop_size = 224
    else:
        if params.dataset == "cifar10":
            params.num_classes = 10
        elif params.dataset == "cifar100":
            params.num_classes = 100
        else:
            assert False, "Dataset unbeknownst to me"

        params.img_size = 40
        params.crop_size = 32

    # data loaders / samplers
    train_data_loader, train_sampler = get_data_loader(
        img_size=params.img_size,
        crop_size=params.crop_size,
        shuffle=True,
        batch_size=params.batch_size,
        nb_workers=params.nb_workers,
        distributed_sampler=params.multi_gpu,
        dataset=params.dataset,
        transform=params.transform,
        split='valid' if params.debug_train else params.split_train,
    )

    valid_data_loader, _ = get_data_loader(
        img_size=params.img_size,
        crop_size=params.crop_size,
        shuffle=False,
        batch_size=params.batch_size,
        nb_workers=params.nb_workers,
        distributed_sampler=False,
        dataset=params.dataset,
        transform='center',
        split='valid',
    )

    # build model / cuda
    logger.info("Building %s model ..." % params.architecture)
    model = build_model(params)
    model.cuda()

    # distributed  # TODO: check this https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main.py#L142
    if params.multi_gpu:
        logger.info("Using nn.parallel.DistributedDataParallel ...")
        model = nn.parallel.DistributedDataParallel(
            model,
            device_ids=[params.local_rank],
            output_device=params.local_rank,
            broadcast_buffers=True)

    # build trainer / reload potential checkpoints / build evaluator
    trainer = Trainer(model=model, params=params)
    trainer.reload_checkpoint()
    evaluator = Evaluator(trainer, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer,
                                         evals=['classif', 'recognition'],
                                         data_loader=valid_data_loader)

        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # training
    for epoch in range(trainer.epoch, params.epochs):

        # update epoch / sampler / learning rate
        trainer.epoch = epoch
        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)
        if params.multi_gpu:
            train_sampler.set_epoch(epoch)

        # update learning rate
        trainer.update_learning_rate()

        # train
        for i, (images, targets) in enumerate(train_data_loader):
            trainer.classif_step(images, targets)
            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate classification accuracy
        scores = evaluator.run_all_evals(trainer,
                                         evals=['classif'],
                                         data_loader=valid_data_loader)

        for name, val in trainer.get_scores().items():
            scores[name] = val

        # print / JSON log
        for k, v in scores.items():
            logger.info('%s -> %.6f' % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 16
0
def main(params):
    if params.adv:
        params.use_lang_emb = False
        print("Language embeddings are not used...\n \n \n \n")
    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    # reload-model options are in here
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])
    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for epoch in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

    # evaluate perplexity
    scores = evaluator.run_all_evals(trainer)

    # print / JSON log
    for k, v in scores.items():
        logger.info("%s -> %.6f" % (k, v))
    if params.is_master:
        logger.info("__log__:%s" % json.dumps(scores))
Esempio n. 17
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # load checkpoint
    if params.model_path != "":
        reloaded = torch.load(params.model_path)
        model_params = AttrDict(reloaded['params'])
        dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'],
                          reloaded['dico_counts'])
        encoder = TransformerModel(model_params,
                                   dico,
                                   is_encoder=True,
                                   with_output=True).cuda().eval()
        decoder = TransformerModel(model_params,
                                   dico,
                                   is_encoder=False,
                                   with_output=True).cuda().eval()
        encoder = TransformerModel(model_params,
                                   dico,
                                   is_encoder=True,
                                   with_output=True).cuda().eval()
        decoder = TransformerModel(model_params,
                                   dico,
                                   is_encoder=False,
                                   with_output=True).cuda().eval()
        encoder.load_state_dict(reloaded['encoder'])
        decoder.load_state_dict(reloaded['decoder'])
        logger.info("Supported languages: %s" %
                    ", ".join(model_params.lang2id.keys()))
    else:
        # build model
        if params.encoder_only:
            model = build_model(params, data['dico'])
        else:
            encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 18
0
def main(args):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(args, make_communication_groups=False)

    # initialize the experiment
    logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec',
                                            'loss', 'prec_val', 'loss_val')

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    if not 'pascal' in args.data_path:
        main_data_path = args.data_path
        args.data_path = os.path.join(main_data_path, 'train')
        train_dataset = load_data(args)
    else:
        train_dataset = VOC2007_dataset(args.data_path, split=args.split)

    args.test = 'val' if args.split == 'train' else 'test'
    if not 'pascal' in args.data_path:
        if args.cross_valid is None:
            args.data_path = os.path.join(main_data_path, 'val')
        val_dataset = load_data(args)
    else:
        val_dataset = VOC2007_dataset(args.data_path, split=args.test)

    if args.cross_valid is not None:
        kfold = KFold(per_target(train_dataset.imgs), args.cross_valid, args.kfold)
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, sampler=kfold.train,
            num_workers=args.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=args.batch_size, sampler=kfold.val,
            num_workers=args.workers)

    else:
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=True,
            num_workers=args.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=args.batch_size, shuffle=False,
            num_workers=args.workers)

    # prepare the different data transformations
    tr_val, tr_train = get_data_transformations()
    train_dataset.transform = tr_train
    val_dataset.transform = tr_val

    # build model skeleton
    fix_random_seeds(args.seed)
    model = model_factory(args.arch, args.sobel)

    load_pretrained(model, args)

    # keep only conv layers
    model.body.classifier = None
    model.conv = args.conv

    if 'places' in args.data_path:
        nmb_classes = 205
    elif 'pascal' in args.data_path:
        nmb_classes = 20
    else:
        nmb_classes = 1000

    reglog = RegLog(args.arch, nmb_classes, args.conv)

    # distributed training wrapper
    model = to_cuda(model, [args.gpu_to_work_on], apex=False)
    reglog = to_cuda(reglog, [args.gpu_to_work_on], apex=False)
    logger.info('model to cuda')


    # set optimizer
    optimizer = sgd_optimizer(reglog, args.lr, args.wd)

    ## variables to reload to fetch in checkpoint
    to_restore = {'epoch': 0, 'start_iter': 0}

    # re start from checkpoint
    restart_from_checkpoint(
        args,
        run_variables=to_restore,
        state_dict=reglog,
        optimizer=optimizer,
    )
    args.epoch = to_restore['epoch']
    args.start_iter = to_restore['start_iter']

    model.eval()
    reglog.train()

    # Linear training
    for _ in range(args.epoch, args.nepochs):

        logger.info("============ Starting epoch %i ... ============" % args.epoch)

        # train the network for one epoch
        scores = train_network(args, model, reglog, optimizer, train_loader)

        if not 'pascal' in args.data_path:
            scores_val = validate_network(val_loader, [model, reglog], args)
        else:
            scores_val = evaluate_pascal(val_dataset, [model, reglog])

        scores = scores + scores_val

        # save training statistics
        logger.info(scores)
        training_stats.update(scores)
Esempio n. 19
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    meta_params = copy.deepcopy(params).meta_params
    params.meta_params = "..."  # to long to be log
    logger = initialize_exp(params)
    params.meta_params = meta_params

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()
    trainer, evaluator, eval_trainers = get_trainer_evaluator(params, logger)

    # evaluation
    if params.eval_only:
        end_of_epoch(params=params,
                     logger=logger,
                     trainer=trainer,
                     evaluator=evaluator)
        if params.eval_tasks:
            logger.info("============ Evaluation task ============")
            for eval_task in params.eval_tasks:
                logger.info("============ %s ============" % eval_task)
                end_of_epoch(params=eval_trainers[eval_task]["params"],
                             logger=logger,
                             trainer=eval_trainers[eval_task]['trainer'],
                             evaluator=eval_trainers[eval_task]['evaluator'],
                             eval_task=eval_task)
        exit()

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        one_epoch(trainer, params)

        if params.eval_tasks:
            logger.info("============ Evaluation task ============")
            for eval_task in params.eval_tasks:
                logger.info("============ %s ============" % eval_task)
                if params.encoder_only:
                    eval_trainers[eval_task]['trainer'].model = copy.deepcopy(
                        trainer.model)
                else:
                    eval_trainers[eval_task][
                        'trainer'].encoder = copy.deepcopy(trainer.encoder)
                    eval_trainers[eval_task][
                        'trainer'].decoder = copy.deepcopy(trainer.decoder)
                one_epoch(eval_trainers[eval_task]['trainer'],
                          eval_trainers[eval_task]["params"],
                          eval_task=eval_task)

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        end_of_epoch(params=params,
                     logger=logger,
                     trainer=trainer,
                     evaluator=evaluator)

        if params.eval_tasks:
            logger.info("============ Evaluation task ============")
            for eval_task in params.eval_tasks:
                end_of_epoch(params=eval_trainers[eval_task]["params"],
                             logger=logger,
                             trainer=eval_trainers[eval_task]['trainer'],
                             evaluator=eval_trainers[eval_task]['evaluator'],
                             eval_task=eval_task)

        # our
        logger.info("============ garbage collector collecting %d ..." %
                    gc.collect())
Esempio n. 20
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    # reload-model options are in here
    if params.encoder_only:
        model = build_model(params, data['dico'])

        if params.use_adapters:
            logger.info("Using adapters")
            for param in model.named_parameters():

                if param[0][:8] != "adapters":
                    param[1].requires_grad = False

            for param_name, param in model.embeddings.named_parameters():
                param.requires_grad = True
            for param_name, param in model.position_embeddings.named_parameters(
            ):
                param.requires_grad = True
            for param_name, param in model.pred_layer.named_parameters():
                param.requires_grad = True
            for param in model.layer_norm_emb.parameters():
                param.requires_grad = True
            for param in model.named_parameters():
                logger.info(param[0] + ' required grad = ' +
                            str(param[1].requires_grad))

    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
        logger.info("Number of trainable parameters (encoder): %i" % sum(
            [p.numel()
             for p in trainer.model.parameters() if p.requires_grad]))

    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)
        logger.info(
            "Number of trainable parameters (encoder): %i" %
            sum([p.numel() for p in encoder.parameters() if p.requires_grad]))
        logger.info(
            "Number of trainable parameters (decoder): %i" %
            sum([p.numel() for p in decoder.parameters() if p.requires_grad]))

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for epoch in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 21
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    if params.other_seed > -1:
        # deterministic
        torch.manual_seed(params.other_seed)
        torch.cuda.manual_seed(params.other_seed)
        np.random.seed(params.other_seed)
        random.seed(params.other_seed)

    if params.iter_seed == -1:
        # non-deterministic
        params.iter_seed = None

    # load data
    data = load_data(params)
    writer = SummaryWriter(params.dump_path + "/" + params.exp_name + "_log")

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        sys.exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)
    _iter = 0

    # dump initial weights
    if params.save_initial:
        trainer.save_checkpoint('initial', include_optimizers=False)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:
            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                if params.only_vlm:
                    # with visual features
                    trainer.vlm_step(lang1, lang2, params.lambda_mlm, _iter)
                else:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm, _iter)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            for lang1, lang2 in shuf_order(params.mmt_steps, params):
                trainer.mmt_step(lang1, lang2, params.lambda_mt)

            trainer.iter()
            _iter += 1

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            writer.add_scalar(k, v, _iter)
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 22
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    params.lgs = lgs = params.lgs.split("-")
    if len(lgs) == 1:
        lgs.append(lgs[0])

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # Replace the original MLM steps
            for lang1, lang2 in shuf_order(params.mlm_steps, params):

                if params.do_meta_update:
                    trainer.meta_mlm_step(lang1)
                else:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 23
0
def main(params):
    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)
    print(data)

    # build model
    if params.encoder_only:
        model = build_model(params)
    else:
        encoder, decoder = build_model(params)

    # build trainer, reload potential checkpoints / build evaluator

    trainer = XTrainer(model, data, params)
    evaluator = XEvaluator(trainer, data, params)
    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" % trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:
            # MLM steps (also includes TLM if lang2 is not None)

            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                if params.is_understanding:
                    trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            for lang1, lang2 in shuf_order(params.text_steps, params):
                if params.is_ntg:
                    trainer.ntg_step(lang1, None, params.lambda_mlm)

            # cross-modal caption steps
            for lang1, lang2 in shuf_order(params.cross_modal_steps, params):
                if params.is_mt:
                    trainer.mt_ic_step(lang1, lang2, params.lambda_ic)
                else:
                    trainer.ic_step(lang1, lang2, params.lambda_ic)

                if params.is_freelb:
                    trainer.free_lb_ic_step(lang1, lang2, params.lambda_ic)

            for lang1, lang2 in shuf_order(params.mlm_steps, params, n=3):
                if params.is_generation:
                    trainer.bart_mlm_step(lang1, lang2, params.lambda_imlm)
                    trainer.bart_mass_step(lang1, lang2, params.lambda_imlm)

            for lang1, lang2 in shuf_order(params.cross_ae_steps, params):
                trainer.bart_img_step(lang1, lang2, params.lambda_ida)

            for lang1, lang2 in shuf_order(params.cross_rel_steps, params):
                if params.is_pretrain:
                    trainer.pretrain_rel_step(lang1, lang2)
                else:
                    if params.is_slide:
                        trainer.slide_step(lang1, lang2, params.lambda_t2i)
                    else:
                        # support multi languages
                        trainer.rel_step(lang1, lang2, params.lambda_t2i, params.lambda_i2t)

            # for lang1, lang2 in shuf_order(params.cross_mlm_steps, params):
            #     trainer.mlm_step(lang1, lang2, params.lambda_mlm)
            #
            # for lang1, lang2 in shuf_order(params.cross_mrm_steps, params):
            #     trainer.mrm_step(lang1, lang2, params.lambda_mrm)
            #
            # for lang1, lang2 in shuf_order(params.cross_mrfr_steps, params):
            #     trainer.mrfr_step(lang1, lang2, params.lambda_mrfr)

            trainer.iter()

        logger.info("============ End of epoch %i ============" % trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        evaluate_results = []
        import os
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))
            evaluate_results.append(json.dumps(scores))
            with open(os.path.join(params.dump_path, "epoch_{0}.eval_log".format(trainer.epoch)), 'w') as writer:
                for line in evaluate_results:
                    writer.write(line + '\n')

        # end of epoch
        trainer.save_best_model(scores)
        if trainer.epoch % params.save_every_epoch == 0 and params.is_master:
            trainer.save_model('model_pretrain_%i' % trainer.epoch)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 24
0
def seq2seq_main(params):
    '''
      Use different vocabulary/dictionary for src and tgt
    '''

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = seq2seq_load_data(params)

    # build model
    # 因為 language pair 會重新升冪排序 (zh-en) --> (en-zh)
    # 所以 en 變成 src , zh 變成 tgt
    encoder, decoder = build_seq2seq_model(
        params, data['tgt_dico'], data['src_dico'])

    # build trainer, reload potential checkpoints / build evaluator
    trainer = EncDecTrainer(encoder, decoder, data, params)
    evaluator = MyEncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 25
0
def main(args):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(args, make_communication_groups=False)

    # initialize the experiment
    logger, training_stats = initialize_exp(args, 'epoch', 'iter', 'prec',
                                            'loss', 'prec_val', 'loss_val')

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    main_data_path = args.data_path
    if args.debug:
        args.data_path = os.path.join(main_data_path, 'val')
    else:
        args.data_path = os.path.join(main_data_path, 'train')
    train_dataset = load_data(args)

    args.data_path = os.path.join(main_data_path, 'val')
    val_dataset = load_data(args)

    # prepare the different data transformations
    tr_val, tr_train = get_data_transformations()
    train_dataset.transform = tr_train
    val_dataset.transform = tr_val
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        num_workers=args.workers,
        pin_memory=True,
    )

    # build model skeleton
    fix_random_seeds(args.seed)
    nmb_classes = 205 if 'places' in args.data_path else 1000
    model = model_factory(args, relu=True, num_classes=nmb_classes)

    # load pretrained weights
    load_pretrained(model, args)

    # merge sobel layers with first convolution layer
    if args.sobel2RGB:
        sobel2RGB(model)

    # re initialize classifier
    if hasattr(model.body, 'classifier'):
        for m in model.body.classifier.modules():
            if isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.fill_(0.1)

    # distributed training wrapper
    model = to_cuda(model, [args.gpu_to_work_on], apex=True)
    logger.info('model to cuda')

    # set optimizer
    optimizer = sgd_optimizer(model, args.lr, args.wd)

    ## variables to reload to fetch in checkpoint
    to_restore = {'epoch': 0, 'start_iter': 0}

    # re start from checkpoint
    restart_from_checkpoint(
        args,
        run_variables=to_restore,
        state_dict=model,
        optimizer=optimizer,
    )
    args.epoch = to_restore['epoch']
    args.start_iter = to_restore['start_iter']

    if args.evaluate:
        validate_network(val_loader, [model], args)
        return

    # Supervised training
    for _ in range(args.epoch, args.nepochs):

        logger.info("============ Starting epoch %i ... ============" %
                    args.epoch)

        fix_random_seeds(args.seed + args.epoch)

        # train the network for one epoch
        adjust_learning_rate(optimizer, args)
        scores = train_network(args, model, optimizer, train_dataset)

        scores_val = validate_network(val_loader, [model], args)

        # save training statistics
        logger.info(scores + scores_val)
        training_stats.update(scores + scores_val)
Esempio n. 26
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build model
    if params.encoder_only:
        model = build_model(params, data['dico'])
    else:
        encoder, decoder = build_model(params, data['dico'])

    # build trainer, reload potential checkpoints / build evaluator
    if params.encoder_only:
        trainer = SingleTrainer(model, data, params)
        evaluator = SingleEvaluator(trainer, data, params)
    else:
        trainer = EncDecTrainer(encoder, decoder, data, params)
        evaluator = EncDecEvaluator(trainer, data, params)

    # evaluation
    if params.eval_only:
        scores = evaluator.run_all_evals(trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for _ in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    trainer.epoch)

        trainer.n_sentences = 0

        while trainer.n_sentences < trainer.epoch_size:

            # CLM steps (causal languge model)
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                trainer.clm_step(lang1, lang2, params.lambda_clm)

            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                trainer.mlm_step(lang1, lang2, params.lambda_mlm)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                trainer.mt_step(lang, lang, params.lambda_ae)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                trainer.mt_step(lang1, lang2, params.lambda_mt)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                trainer.bt_step(lang1, lang2, lang3,
                                params.lambda_bt, params.bt_sample_temperature)

            trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        if params.validation_metrics != '':
            trainer.save_best_model(scores)
        trainer.save_periodic()
        trainer.end_epoch(scores)
Esempio n. 27
0
def main(params):

    # initialize the multi-GPU / multi-node training
    init_distributed_mode(params)

    # initialize the experiment
    logger = initialize_exp(params)

    # initialize SLURM signal handler for time limit / pre-emption
    init_signal_handler()

    # load data
    data = load_data(params)

    # build the big model
    if params.encoder_only:
        big_model = build_model(params, data['dico'], cut=False)
    else:
        # 修改处1
        big_encoder, big_decoder = build_model(params, data['dico'], cut=False)

    # if we cut some layers, must build a small model
    if params.cut_layer:
        if params.encoder_only:
            small_model = build_model(params, data['dico'], cut=True)
        else:
            # 修改处1
            small_encoder, small_decoder = build_model(params,
                                                       data['dico'],
                                                       cut=True)

    # build the big trainer, reload potential checkpoints
    # the big trainer is used to train, so need't a evaluator for it
    if params.encoder_only:
        big_trainer = SingleTrainer(big_model, data, params)
    else:
        big_trainer = EncDecTrainer(big_encoder, big_decoder, data, params)

    params.lambda_mlm = "1"
    params.lambda_clm = "1"
    params.lambda_pc = "1"
    params.lambda_ae = "1"
    params.lambda_mt = "1"
    params.lambda_bt = "1"

    # build the small model, and use it for evaluator
    if params.encoder_only:
        small_trainer = small_SingleTrainer(small_model, data, params)
        evaluator = SingleEvaluator(small_trainer, data, params)
    else:
        small_trainer = small_EncDecTrainer(small_encoder, small_decoder, data,
                                            params)
        evaluator = EncDecEvaluator(small_trainer, data, params)

    # evaluation only for the small trainer
    if params.eval_only:
        scores = evaluator.run_all_evals(small_trainer)
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        logger.info("__log__:%s" % json.dumps(scores))
        exit()

    # set sampling probabilities for training
    set_sampling_probs(data, params)

    # language model training
    for count in range(params.max_epoch):

        logger.info("============ Starting epoch %i ... ============" %
                    small_trainer.epoch)

        small_trainer.n_sentences = 0

        while small_trainer.n_sentences < small_trainer.epoch_size:

            # CLM steps
            for lang1, lang2 in shuf_order(params.clm_steps, params):
                small_trainer.clm_step(lang1, lang2, params.lambda_clm,
                                       big_trainer)
            # MLM steps (also includes TLM if lang2 is not None)
            for lang1, lang2 in shuf_order(params.mlm_steps, params):
                small_trainer.mlm_step(lang1, lang2, params.lambda_mlm,
                                       big_trainer)

            # parallel classification steps
            for lang1, lang2 in shuf_order(params.pc_steps, params):
                small_trainer.pc_step(lang1, lang2, params.lambda_pc)

            # denoising auto-encoder steps
            for lang in shuf_order(params.ae_steps):
                small_trainer.mt_step(lang, lang, params.lambda_ae,
                                      big_trainer)

            # machine translation steps
            for lang1, lang2 in shuf_order(params.mt_steps, params):
                small_trainer.mt_step(lang1, lang2, params.lambda_mt,
                                      big_trainer)

            # back-translation steps
            for lang1, lang2, lang3 in shuf_order(params.bt_steps):
                small_trainer.bt_step(lang1, lang2, lang3, params.lambda_bt)

            small_trainer.iter()

        logger.info("============ End of epoch %i ============" %
                    small_trainer.epoch)

        # evaluate perplexity
        scores = evaluator.run_all_evals(small_trainer)

        # print / JSON log
        for k, v in scores.items():
            logger.info("%s -> %.6f" % (k, v))
        if params.is_master:
            logger.info("__log__:%s" % json.dumps(scores))

        # end of epoch
        small_trainer.save_best_model(scores)
        small_trainer.save_periodic()
        small_trainer.end_epoch(scores)