Beispiel #1
0
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set device to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)
    plots_dir = parser.plots_dir

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))
    log.info("We will save the plots in this directory: {}".format(plots_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.tok.' + source_name,
        'target': main_data_path + 'train.tok.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.tok.' + source_name,
        'target': main_data_path + 'dev.tok.' + target_name
    }
    path_to_test_data = {
        'source': main_data_path + 'test.tok.' + source_name,
        'target': main_data_path + 'test.tok.' + target_name
    }

    # Configuration
    bs = parser.batch_size
    log.info("Batch size = {}.".format(bs))

    enc_emb = parser.enc_emb
    enc_hidden = parser.enc_hidden
    enc_layers = parser.enc_layers
    rnn_type = parser.rnn_type

    dec_emb = parser.dec_emb
    dec_hidden = parser.dec_hidden
    dec_layers = parser.dec_layers

    learning_rate = parser.learning_rate
    num_epochs = parser.epochs
    attn_flag = parser.wo_attn
    log.info("The attention flag is set to {}.".format(attn_flag))
    beam_size = parser.beam_size
    log.info("We evaluate using beam size of {}.".format(beam_size))

    train, val, test, en_lang, vi_lang = dataset_helper.train_val_load(
        "", main_data_path)

    # get vocab sizes
    log.info('English has vocab size of: {} words.'.format(en_lang.n_words))
    log.info('Vietnamese has vocab size of: {} words.'.format(vi_lang.n_words))

    # get max sentence length by 95% percentile
    MAX_LEN = int(train['en_len'].quantile(0.95))
    log.info(
        'We will have a max sentence length of {} (95 percentile).'.format(
            MAX_LEN))

    # set data loaders
    bs_dict = {'train': bs, 'validate': 1, 'test': 1}
    shuffle_dict = {'train': True, 'validate': False, 'test': False}

    train_used = train
    val_used = val

    collate_fn_dict = {
        'train': partial(dataset_helper.vocab_collate_func, MAX_LEN=MAX_LEN),
        'validate': dataset_helper.vocab_collate_func_val,
        'test': dataset_helper.vocab_collate_func_val
    }

    transformed_dataset = {
        'train': dataset_helper.Vietnamese(train_used),
        'validate': dataset_helper.Vietnamese(val_used, val=True),
        'test': dataset_helper.Vietnamese(test, val=True)
    }

    dataloader = {
        x: DataLoader(transformed_dataset[x],
                      batch_size=bs_dict[x],
                      collate_fn=collate_fn_dict[x],
                      shuffle=shuffle_dict[x],
                      num_workers=0)
        for x in ['train', 'validate', 'test']
    }

    # instantiate encoder/decoder
    encoder_wo_att = nnet_models.EncoderRNN(input_size=vi_lang.n_words,
                                            embed_dim=enc_emb,
                                            hidden_size=enc_hidden,
                                            n_layers=enc_layers,
                                            rnn_type=rnn_type).to(device)
    decoder_wo_att = nnet_models.AttentionDecoderRNN(
        output_size=en_lang.n_words,
        embed_dim=dec_emb,
        hidden_size=dec_hidden,
        n_layers=dec_layers,
        attention=attn_flag).to(device)

    # instantiate optimizer
    if parser.optimizer == 'sgd':
        encoder_optimizer = optim.SGD(encoder_wo_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
        decoder_optimizer = optim.SGD(decoder_wo_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
    elif parser.optimizer == 'adam':
        # lee kho learning rate
        encoder_optimizer = optim.Adam(encoder_wo_att.parameters(), lr=1e-4)
        decoder_optimizer = optim.Adam(decoder_wo_att.parameters(), lr=1e-4)
    else:
        raise ValueError('Invalid optimizer!')

    # instantiate scheduler
    enc_scheduler = ReduceLROnPlateau(encoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    dec_scheduler = ReduceLROnPlateau(decoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    criterion = nn.NLLLoss(ignore_index=global_variables.PAD_IDX)

    log.info(
        "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, rnn_type = {}, enc_emb = {}, enc_hidden = {}, enc_layers = {}, dec_emb = {}, dec_hidden = {}, dec_layers = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(bs, learning_rate, rnn_type, enc_emb, enc_hidden, enc_layers,
                dec_emb, dec_hidden, dec_layers, num_epochs, source_name,
                target_name))

    # do we want to train again?
    train_again = False
    encoder_save = '{}_wo_att_{}bs_{}hs_{}_{}beam_enc_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, enc_layers)
    decoder_save = '{}_wo_att_{}bs_{}hs_{}_{}beam_dec_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, dec_layers)

    if os.path.exists(utils.get_full_filepath(
            saved_models_dir, encoder_save)) and os.path.exists(
                utils.get_full_filepath(saved_models_dir,
                                        decoder_save)) and (not train_again):
        log.info("Retrieving saved encoder from {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Retrieving saved decoder from {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        encoder_wo_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               encoder_save)))
        decoder_wo_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               decoder_save)))
    else:
        log.info("Check if encoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Check if decoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        log.info("Encoder and Decoder do not exist! Starting to train...")
        encoder_wo_att, decoder_wo_att, loss_hist, acc_hist = train_utilities.train_model(
            encoder_optimizer,
            decoder_optimizer,
            encoder_wo_att,
            decoder_wo_att,
            criterion,
            "no_attention",
            dataloader,
            en_lang,
            vi_lang,
            saved_models_dir,
            encoder_save,
            decoder_save,
            num_epochs=num_epochs,
            rm=0.95,
            enc_scheduler=enc_scheduler,
            dec_scheduler=dec_scheduler)
        log.info("Total time is: {} min : {} s".format(
            (time.time() - start) // 60, (time.time() - start) % 60))
        log.info(
            "We will save the encoder/decoder in this directory: {}".format(
                saved_models_dir))

    # BLEU with beam size
    bleu_no_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_wo_att,
        decoder_wo_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'no_attention',
        beam_size,
        verbose=False)

    log.info("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))
    print("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))

    bleu_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_wo_att,
        decoder_wo_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'no_attention',
        beam_size,
        verbose=False,
        replace_unk=True)

    log.info("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))
    print("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))

    # generate 5 random predictions
    indexes = range(len(pred_wo))
    for i in np.random.choice(indexes, 5):
        print('Source: {} \nPrediction: {}\n---'.format(src_wo[i], pred_wo[i]))
        log.info('Source: {} \nPrediction: {}\n---'.format(
            src_wo[i], pred_wo[i]))

    log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir))
    _, _, fig = utils.get_binned_bl_score(
        encoder=encoder_wo_att,
        decoder=decoder_wo_att,
        val_dataset=transformed_dataset['validate'],
        attn_flag=attn_flag,
        beam_size=beam_size,
        location=plots_dir,
        collate=collate_fn_dict['validate'],
        lang_en=en_lang,
        lang_vi=vi_lang)
Beispiel #2
0
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)
    plots_dir = parser.plots_dir

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))
    log.info("We will save the plots in this directory: {}".format(plots_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.' + source_name,
        'target': main_data_path + 'train.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.' + source_name,
        'target': main_data_path + 'dev.' + target_name
    }
    # get language objects
    saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj')

    # get dictionary of datasets
    dataset_dict = {
        'train':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_train_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1),
        'dev':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_dev_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1)
    }

    # get max sentence length by 99% percentile
    MAX_LEN = int(dataset_dict['train'].main_df['source_len'].quantile(0.9999))
    log.info("MAX_LEN (99th Percentile) = {}".format(MAX_LEN))
    batchSize = parser.batch_size
    log.info("Batch size = {}.".format(batchSize))

    dataloader_dict = {
        'train':
        DataLoader(dataset_dict['train'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0),
        'dev':
        DataLoader(dataset_dict['dev'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0)
    }

    # Configuration
    source_lang_obj = dataset_dict['train'].source_lang_obj
    target_lang_obj = dataset_dict['train'].target_lang_obj

    source_vocab = dataset_dict['train'].source_lang_obj.n_words
    target_vocab = dataset_dict['train'].target_lang_obj.n_words
    hidden_size = parser.hidden_size
    rnn_layers = parser.rnn_layers
    lr = parser.learning_rate
    longest_label = parser.longest_label
    gradient_clip = parser.gradient_clip
    num_epochs = parser.epochs

    log.info(
        "The source vocab ({}) has {} words and target vocab ({}) has {} words"
        .format(source_name, source_vocab, target_name, target_vocab))

    # encoder model
    encoder_rnn = nnet_models_new.EncoderRNN(input_size=source_vocab,
                                             hidden_size=hidden_size,
                                             numlayers=rnn_layers)
    # decoder model
    decoder_rnn = nnet_models_new.DecoderRNN(output_size=target_vocab,
                                             hidden_size=hidden_size,
                                             numlayers=rnn_layers)

    # seq2seq model
    nmt_rnn = nnet_models_new.seq2seq(
        encoder_rnn,
        decoder_rnn,
        lr=lr,
        hiddensize=hidden_size,
        numlayers=hidden_size,
        target_lang=dataset_dict['train'].target_lang_obj,
        longest_label=longest_label,
        clip=gradient_clip,
        device=device)

    log.info(
        "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, hidden_size = {}, rnn_layers = {}, lr = {}, longest_label = {}, gradient_clip = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(batchSize, lr, hidden_size, rnn_layers, lr, longest_label,
                gradient_clip, num_epochs, source_name, target_name))

    # do we want to train again?
    train_again = False

    saved_file_name = 'no_attn_bs{}_lr{}_hs_{}_rnnlayer{}'.format(
        batchSize, lr, hidden_size, rnn_layers)

    # check if there is a saved model and if we want to train again
    if os.path.exists(utils.get_full_filepath(saved_models_dir,
                                              'rnn')) and (not train_again):
        log.info("Retrieving saved model from {}".format(
            utils.get_full_filepath(saved_models_dir, 'rnn')))
        nmt_rnn = torch.load(utils.get_full_filepath(saved_models_dir, 'rnn'),
                             map_location=global_variables.device)
    # train model again
    else:
        log.info("Check if this path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, saved_file_name)))
        log.info("It does not exist! Starting to train...")
        utils.train_model(dataloader_dict,
                          nmt_rnn,
                          num_epochs=num_epochs,
                          saved_model_path=saved_models_dir,
                          enc_type=saved_file_name)
    log.info("Total time is: {} min : {} s".format((time.time() - start) // 60,
                                                   (time.time() - start) % 60))
    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))

    # generate translations
    use_cuda = True
    utils.get_translation(nmt_rnn, 'I love to watch science movies on Mondays',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn,
                          'I want to be the best friend that I can be',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'I love you', source_lang_obj, use_cuda,
                          source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I love football, I like to watch it with my friends. It is always a great time.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I do not know what I would do without pizza, it is very tasty to eat. If I could have any food in the world it would probably be pizza.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'Trump is the worst president in all of history. He can be a real racist and say very nasty things to people of color.',
        source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'Thank you very much.', source_lang_obj,
                          use_cuda, source_name, target_name)
    utils.get_translation(nmt_rnn, 'Think about your own choices.',
                          source_lang_obj, use_cuda, source_name, target_name)
    utils.get_translation(
        nmt_rnn,
        'I recently did a survey with over 2,000 Americans , and the average number of choices that the typical American reports making is about 70 in a typical day .',
        source_lang_obj, use_cuda, source_name, target_name)

    # export plot
    log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir))
    _, _, fig = utils.get_binned_bl_score(nmt_rnn,
                                          dataset_dict['dev'],
                                          plots_dir,
                                          batchSize=batchSize)
Beispiel #3
0
def main():

    # parse arguments
    parser = args.parse_args()

    # set up logger
    log_path = os.path.join(parser.save_dir, "logs")
    if not os.path.exists(log_path):
        os.mkdir(log_path)

    log_fname = os.path.join(
        log_path, "{}_log_{}.log".format(parser.exp_name,
                                         dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_fname,
                    format='%(asctime)s: %(name)s || %(message)s',
                    level=log.INFO)

    # =============================================================================
    #     start
    # =============================================================================
    log.info("=" * 40 + " Start Program " + "=" * 40)

    # =============================================================================
    #     misc stuff
    # =============================================================================

    # Set devise to CPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    log.info("Device is {}".format(device))

    # set random seeds
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)

    # set data directory
    log.info("Data Directory is {}.".format(parser.data_dir))

    # =============================================================================
    #     import data
    # =============================================================================
    task_names = parser.data_name.split(',')
    content_headers = parser.content.split(',')
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        parser.model, do_lower_case=parser.do_lower_case)
    data_handler = myio.IO(
        data_dir=parser.data_dir,
        model_name=parser.model,
        task_names=task_names,
        tokenizer=tokenizer,
        max_length=parser.input_length,
        content=content_headers,
        review_key=parser.review_key,
        label_name=parser.label_name,
        val_split=parser.val_split,
        test_split=parser.test_split,
        batch_size=parser.batch_size,
        shuffle=not parser.no_shuffle,
        cache=not parser.no_cache,
    )

    data_handler.read_task()

    # =============================================================================
    #     define model
    # =============================================================================
    log.info("=" * 40 + " Defining Model " + "=" * 40)
    config = transformers.AutoConfig.from_pretrained(parser.model)
    classifier = model.Model(
        model=parser.model,
        config=config,
        n_others=parser.n_others,
        n_hidden=parser.n_class_hidden,
        n_flag=parser.n_labels,
        load=parser.preload_emb,
        load_name=parser.preload_emb_name,
    )

    # =============================================================================
    #     define trainer
    # =============================================================================

    log.info("Save Directory is {}.".format(parser.save_dir))
    log.info("=" * 40 + " Defining Trainer " + "=" * 40)

    # create trainer object
    trainer = learner.Learner(
        model=classifier,
        device=device,
        myio=data_handler,
        max_epochs=parser.max_epochs,
        save_path=parser.save_dir,
        lr=parser.lr,
        weight_decay=parser.weight_decay,
        pct_start=parser.pct_start,
        anneal_strategy=parser.anneal_strategy,
        cycle_momentum=parser.cycle_momentum,
        log_int=parser.log_int,
        buffer_break=not parser.no_early_stop,
        break_int=parser.patience,
        accumulate_int=parser.grad_accum,
        max_grad_norm=parser.max_grad_norm,
        n_others=parser.n_others,
        batch_size=parser.batch_size,
        check_int=parser.check_int,
        save=parser.save,
        test=parser.test,
    )

    # train model
    best = trainer.learn(
        model_name=parser.model,
        task_name=task_names[0],
        early_check=parser.early_check,
        debug=parser.debug,
    )

    best['experiment'] = parser.exp_name

    #write results to "results.jsonl"
    if not os.path.exists(parser.save_dir):
        os.mkdir(parser.save_dir)

    results_name = os.path.join(parser.save_dir, "results.jsonl")
    with open(results_name, 'a') as f:
        f.write(json.dumps(best) + "\n")

    log.info("=" * 40 + " Program Complete " + "=" * 40)
    log.info("=" * 40 + " Results written to {} ".format(results_name) +
             "=" * 40)
Beispiel #4
0
def main():
    """
    Main method for meta-learning
    """
    start = time.time()
    repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_meta_run_log_{}.log'.format(parser.experiment,
                                        dt.now().strftime("%Y%m%d_%H%M")))
    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    log.info("Device is {}".format(device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)

    log.info("Starting experiment {} meta learning on {} with model {}".format(
        parser.experiment, device, parser.model))

    # set tokenizer and config from Huggingface
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        parser.model, do_lower_case=parser.do_lower_case)
    config = transformers.AutoConfig.from_pretrained(parser.model)

    # create IO object and import data
    cache_head = os.path.join(parser.save_dir, 'cached_data')
    cache_dir = os.path.join(cache_head, parser.model)
    if not os.path.exists(cache_head):
        os.mkdir(cache_head)
    if not os.path.exists(cache_dir):
        os.mkdir(cache_dir)

    data_handler = myio.IO(parser.data_dir,
                           cache_dir,
                           tokenizer,
                           parser.max_seq_length,
                           parser.doc_stride,
                           parser.max_query_length,
                           batch_size=parser.batch_size,
                           shuffle=True,
                           cache=True)

    # set oml
    oml = meta_learner.MetaLearningClassification(
        update_lr=parser.meta_update_lr,
        meta_lr=parser.meta_meta_lr,
        hf_model_name=parser.model,
        config=config,
        myio=data_handler,
        max_grad_norm=parser.max_grad_norm,
        device=device)

    if isinstance(oml.net, nn.DataParallel):
        rln = oml.net.module.model.bert
    else:
        rln = oml.net.model.bert

    old_weights = copy.deepcopy(rln)

    # freeze_layers
    oml.freeze_rln()

    # do meta_learning
    meta_tasks = parser.meta_tasks.split(',')

    # create save path
    meta_RLN_head = os.path.join(parser.save_dir, "meta_weights")
    if not os.path.exists(meta_RLN_head):
        os.mkdir(meta_RLN_head)
    meta_RLN_weights = os.path.join(meta_RLN_head,
                                    parser.experiment + "_meta_weights.pt")

    meta_steps = trange(0,
                        parser.meta_steps,
                        desc='Meta Outer',
                        mininterval=30)
    running_loss = 0
    for step in meta_steps:

        # sample tasks
        sample_tasks = np.random.choice(meta_tasks,
                                        parser.n_meta_tasks,
                                        replace=False)

        # sample trajectory
        d_traj = []
        d_rand = []
        for task in sample_tasks:
            task_traj, task_rand = data_handler.sample_dl(
                task=task, samples=parser.n_meta_task_samples, use='train')
            d_traj += task_traj
            d_rand += task_rand

        loss = oml(d_traj, d_rand)
        running_loss += loss
        if step % parser.verbose_steps == 0:
            log.info(
                f"OML Loss is {loss} | Step {step} | Average is {running_loss/max(1,step)}"
            )

        # check if rln weights are changing
        changed = False

        if isinstance(oml.net, nn.DataParallel):
            rln = oml.net.module.model.bert
        else:
            rln = oml.net.model.bert

        for old, new in zip(old_weights.parameters(), rln.parameters()):
            if not old.equal(new):
                changed = True
                break

        assert changed, "Weights are the same"

        # save every meta step
        # for multi-GPU
        if isinstance(oml.net, nn.DataParallel):
            weights = oml.net.module.model.bert.state_dict()
        else:
            weights = oml.net.model.bert.state_dict()

        torch.save(weights, meta_RLN_weights)
        log.info(
            f"Meta loss is {loss} | Step {step} | Average is {running_loss/(step+1)}"
        )
        log.info(f"Changed weights: {changed}")
        log.info("Saved meta weights at {}".format(meta_RLN_weights))

    log.info("Total time is: {} min : {} s".format((time.time() - start) // 60,
                                                   (time.time() - start) % 60))
Beispiel #5
0
from args import args

args = args.parse_args()


PERCEPTION_RATINGS = {
    0: 'To Be Decided',
    1: 'Major Star',
    2: 'Star',
    3: 'Well Known',
    4: 'Recognisable',
    5: 'Unimportant'}


def get_momentum_text(rating):
    if rating > 500:
        return 'White Hot'
    elif rating > 400:
        return 'Red Hot'
    elif rating > 300:
        return 'Very Hot'
    elif rating > 200:
        return 'Hot'
    elif rating > 100:
        return 'Very Warm'
    elif rating > 0:
        return 'Warm'
    elif rating == 0:
        return 'Neutral'
    elif rating < -300:
        return 'Very Cold'
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.' + source_name,
        'target': main_data_path + 'train.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.' + source_name,
        'target': main_data_path + 'dev.' + target_name
    }
    # get language objects
    saved_language_model_dir = os.path.join(saved_models_dir, 'lang_obj')

    # get dictionary of datasets
    dataset_dict = {
        'train':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_train_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1),
        'dev':
        nmt_dataset.LanguagePair(source_name=source_name,
                                 target_name=target_name,
                                 filepath=path_to_dev_data,
                                 lang_obj_path=saved_language_model_dir,
                                 minimum_count=1)
    }

    # get max sentence length by 99% percentile
    MAX_LEN = int(dataset_dict['train'].main_df['source_len'].quantile(0.9999))
    batchSize = parser.batch_size
    log.info("Batch size = {}.".format(batchSize))

    dataloader_dict = {
        'train':
        DataLoader(dataset_dict['train'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0),
        'dev':
        DataLoader(dataset_dict['dev'],
                   batch_size=batchSize,
                   collate_fn=partial(nmt_dataset.vocab_collate_func,
                                      MAX_LEN=MAX_LEN),
                   shuffle=True,
                   num_workers=0)
    }

    # Configuration
    source_lang_obj = dataset_dict['train'].source_lang_obj
    target_lang_obj = dataset_dict['train'].target_lang_obj

    source_vocab = dataset_dict['train'].source_lang_obj.n_words
    target_vocab = dataset_dict['train'].target_lang_obj.n_words
    hidden_size = parser.hidden_size
    rnn_layers = parser.rnn_layers
    lr = parser.learning_rate
    longest_label = parser.longest_label
    gradient_clip = parser.gradient_clip
    num_epochs = parser.epochs
    encoder_attention = parser.encoder_attention
    self_attention = parser.self_attention

    log.info("encoder_attention = {}, self_attention = {}".format(
        encoder_attention, self_attention))

    # encoder model
    encoder_transformer = nnet_models_new.EncoderTransformer(
        source_vocab, MAX_LEN, hidden_size, rnn_layers)

    # decoder model
    decoder_encoderattn = nnet_models_new.Decoder_SelfAttn(
        output_size=target_vocab,
        hidden_size=hidden_size,
        encoder_attention=encoder_attention,
        self_attention=self_attention)

    # seq2seq model
    nmt_encoderattn = nnet_models_new.seq2seq(
        encoder_transformer,
        decoder_encoderattn,
        lr=lr,
        hiddensize=hidden_size,
        numlayers=hidden_size,
        target_lang=dataset_dict['train'].target_lang_obj,
        longest_label=longest_label,
        clip=gradient_clip,
        device=device)

    log.info(
        "Seq2Seq Model with the following parameters: encoder_attention = {}, self_attention = {}, batch_size = {}, learning_rate = {}, hidden_size = {}, rnn_layers = {}, lr = {}, longest_label = {}, gradient_clip = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(encoder_attention, self_attention, batchSize, lr, hidden_size,
                rnn_layers, lr, longest_label, gradient_clip, num_epochs,
                source_name, target_name))

    # do we want to train again?
    train_again = False
    modelname = 'encoderattn'

    # check if there is a saved model and if we want to train again
    if os.path.exists(utils.get_full_filepath(
            saved_models_dir, modelname)) and (not train_again):
        log.info("Retrieving saved model from {}".format(
            utils.get_full_filepath(saved_models_dir, modelname)))
        nmt_rnn = torch.load(
            utils.get_full_filepath(saved_models_dir, modelname))
    # train model again
    else:
        log.info("Check if this path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, modelname)))
        log.info("It does not exist! Starting to train...")
        utils.train_model(dataloader_dict,
                          nmt_encoderattn,
                          num_epochs=num_epochs,
                          saved_model_path=saved_models_dir,
                          enc_type='encoderattn_test')
    log.info("Total time is: {} min : {} s".format((time.time() - start) // 60,
                                                   (time.time() - start) % 60))
Beispiel #7
0
def main():
    """
    Main method for experiment
    """
    start = time.time()
    repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))
    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    log.info("Device is {}".format(device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)

    log.info("Starting experiment {} on {} with model {}".format(
        parser.experiment, device, parser.model))

    print("{}".format(parser.experiment))

    # set tokenizer and config from Huggingface
    tokenizer = transformers.AutoTokenizer.from_pretrained(
        parser.model, do_lower_case=parser.do_lower_case)
    config = transformers.AutoConfig.from_pretrained(parser.model)

    # create IO object and import data
    cache_head = os.path.join(parser.save_dir, 'cached_data')
    cache_dir = os.path.join(cache_head, parser.model)
    if not os.path.exists(cache_head):
        os.mkdir(cache_head)
    if not os.path.exists(cache_dir):
        os.mkdir(cache_dir)

    data_handler = myio.IO(parser.data_dir,
                           cache_dir,
                           tokenizer,
                           parser.max_seq_length,
                           parser.doc_stride,
                           parser.max_query_length,
                           batch_size=parser.batch_size,
                           shuffle=True,
                           cache=True)

    # =============================================================================
    # BASELINE
    # =============================================================================
    # parse continual learning curriculum
    parser.continual_curriculum = parser.continual_curriculum.split(',')

    # create BERT model
    BERTmodel = model.QAModel(
        parser.model,
        config,
        load_rln=parser.load_rln,
        rln_weights=parser.rln_weights,
    )

    # create learner object for BERT model
    trainer = learner.Learner(
        parser.access_mode,
        parser.fp16,
        parser.fp16_opt_level,
        BERTmodel,
        parser.model,
        device,
        data_handler,
        parser.save_dir,
        parser.n_best_size,
        parser.max_answer_length,
        parser.do_lower_case,
        parser.verbose_logging,
        parser.version_2_with_negative,
        parser.null_score_diff_threshold,
        max_steps=parser.fine_tune_steps,
        log_int=parser.logging_steps,
        best_int=parser.save_steps,
        verbose_int=parser.verbose_steps,
        max_grad_norm=parser.max_grad_norm,
        optimizer=None,
        weight_decay=parser.weight_decay,
        lr=parser.learning_rate,
        eps=parser.adam_epsilon,
        warmup_steps=parser.warmup_steps,
        freeze_embeddings=parser.freeze_embeddings,
    )

    # create continual learning object and perform continual learning
    c_learner = cont_learning.ContLearner(
        parser.model,
        'BERT',
        trainer,
        curriculum=parser.continual_curriculum,
        fine_tune_prev=not parser.no_prev_fine_tune)

    log.info("Starting Continual Learning")
    if not parser.no_cont_learning:
        c_learner.c_learn(rln_only=parser.carry_rln_only)

    if len(parser.continual_curriculum) > 1 and not parser.no_forget_eval:
        c_learner.evaluate_forgetting(rln_only=parser.carry_rln_only)

        log.info("Generating Plot")
        # generate BERT plot
        now = dt.now().strftime("%Y%m%d_%H%M")

        # create results folders if not generated
        plot_dir = os.path.join(parser.save_dir, "plots")
        json_dir = os.path.join(parser.save_dir, "json_results")

        if not os.path.exists(plot_dir):
            os.mkdir(plot_dir)

        if not os.path.exists(json_dir):
            os.mkdir(json_dir)

        # plot results and save
        plot = analyze.plot_learning(c_learner.scores,
                                     x_tick_int=2 * parser.logging_steps,
                                     iterations=parser.fine_tune_steps)
        plot_name = os.path.join(
            plot_dir, "baseline_{}_{}_{}.png".format(parser.experiment,
                                                     parser.model, now))
        plot.savefig(plot_name)
        os.chmod(plot_name, parser.access_mode)
        log.info("Plot saved at: {}".format(plot_name))

        # write data to json
        baseline_results_name = os.path.join(
            json_dir, "baseline_{}_{}_{}.json".format(parser.experiment,
                                                      parser.model, now))
        with open(baseline_results_name, 'w') as fw:
            json.dump(c_learner.scores, fw)
        os.chmod(baseline_results_name, parser.access_mode)
        log.info(
            "Baseline results written to: {}".format(baseline_results_name))

    log.info("Total time is: {}min : {}s".format((time.time() - start) // 60,
                                                 (time.time() - start) % 60))
Beispiel #8
0
def main():
    """
    Main method for experiment
    """
    start = time.time()
    repository = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    parser = args.parse_args()

    if parser.run_log == 'log':
        parser.run_log = os.path.join(parser.save_dir, 'log')

    if not os.path.exists(parser.run_log):
        os.mkdir(parser.run_log)

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))
    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set devise to CPU if available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    log.info("Device is {}".format(device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)

    log.info("Starting experiment {} on {}".format(parser.experiment, device))

    data_handler = myio.IO(
        data_dir=parser.data_dir,  # directory storing data
        batch_size=parser.batch_size,  # batch size
        shuffle=not parser.no_shuffle,  # whether to shuffle training data
        split=parser.val_split,  # percentage of data for validation
    )

    # TODO:
    # create model
    my_model = model.Model(
        road_lambda=parser.road_lambda,  # relative weight of road map loss
        box_lambda=parser.box_lambda,  # relative weight of bounding box loss
        preload_backbone=parser.preload,  # whether to load pretrained weights
        backbone_weights=parser.
        preload_weights,  # pretrained backbone weights if needed
    )

    # create learner
    trainer = learner.Learner(
        access_mode=parser.access_mode,  # os access mode for created files
        experiment_name=parser.experiment,  # name of experiment
        model=my_model,  # model
        device=device,  # device to run experiment
        myio=data_handler,  # myio.IO object for loading data
        save_dir=parser.save_dir,  # directory to save results
        max_steps=parser.training_steps,  # maximum number of update steps
        best_int=parser.save_steps,  # interval for checking weights
        verbose_int=parser.verbose_steps,  # interval for logging information
        max_grad_norm=parser.
        max_grad_norm,  # maximum gradients to avoid exploding gradients
        optimizer=None,  # optimizer for training
        weight_decay=parser.weight_decay,  # weight decay if using
        lr=parser.learning_rate,  # learning rate
        eps=parser.adam_epsilon,  # epsilon to use for adam
        accumulate_int=parser.
        accumulate_int,  # number of steps to accumulate gradients before stepping
        batch_size=parser.batch_size,  # batch size
        warmup_pct=parser.
        pct_start,  # percent of updates used to warm-up learning rate
        save=not parser.no_save,  # whether to save weights
        patience=parser.
        patience,  # number of checks without improvement before early stop
    )

    # train model
    results = trainer.train(labeled=not parser.no_label, debug=parser.debug)

    results["experiment"] = parser.experiment

    # write results to "results.jsonl"
    results_name = os.path.join(parser.save_dir, "results.jsonl")
    with open(results_name, 'a') as f:
        f.write(json.dumps(results) + "\n")
    os.chmod(results_name, parser.access_mode)

    log.info("Results written to: {}".format(results_name))

    log.info("Total time is: {} min : {} sec".format(
        (time.time() - start) // 60, (time.time() - start) % 60))