Beispiel #1
0
def main():
    # initialize variable
    parser = argparse.ArgumentParser(
        description='Initialize training parameter.')
    parser.add_argument('-device',
                        required=True,
                        type=str,
                        help='"cuda" or "cpu"')
    parser.add_argument('-save_path', type=str, default='saved')
    parser.add_argument('-use_saved_fields', action='store_true')
    parser.add_argument('-use_saved_weights', action='store_true')
    parser.add_argument('-epochs', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=3000)
    parser.add_argument('-max_seq_len', type=int, default=80)
    parser.add_argument('-max_pondering_time', type=int, default=10)
    parser.add_argument('-dropout', type=float, default=0.5)
    parser.add_argument('-learning_rate', type=float, default=0.0001)
    parser.add_argument('-nhead', type=int, default=2)
    parser.add_argument('-embedding_dim', type=int, default=512)
    parser.add_argument('-feedforward_dim', type=int, default=2048)
    parser.add_argument('-lr_scheduling', action='store_true')
    args = parser.parse_args()
    src_lang = 'en'
    tgt_lang = 'fr'
    # create train iterator (create field, dataset, iterator)
    # # create field
    cwd = os.path.abspath(__file__).replace('/train.py', '')
    if args.use_saved_fields:
        if args.device == 'cpu':
            print('loading saved fields...')
            with open(f'{cwd}/{args.save_path}/src.pickle', 'rb') as s:
                src_field = pickle.load(s)
            with open(f'{cwd}/{args.save_path}/tgt.pickle', 'rb') as t:
                tgt_field = pickle.load(t)
            print('end.')
        else:
            exit('use_saved_fields option can be used on only cpu.')
    else:
        print('creating fields...')
        src_field: torchtext.data.field.Field = torchtext.data.Field(
            lower=True, tokenize=Tokenize(src_lang))
        tgt_field: torchtext.data.field.Field = torchtext.data.Field(
            lower=True,
            tokenize=Tokenize(tgt_lang),
            init_token='<sos>',
            eos_token='<eos>')
        print('end.')
    # # create dataset
    print('creating dataset iterator...')
    src_data = open(f"{cwd}/data/english.txt").read().strip().split('\n')
    tgt_data = open(f"{cwd}/data/french.txt").read().strip().split('\n')
    df = pd.DataFrame({
        'src': src_data,
        'tgt': tgt_data
    },
                      columns=["src", "tgt"])
    too_long_mask = (df['src'].str.count(' ') < args.max_seq_len) & (
        df['tgt'].str.count(' ') < args.max_seq_len)
    df = df.loc[too_long_mask]  # remove too long sentence
    df.to_csv("tmp_dataset.csv", index=False)
    dataset = torchtext.data.TabularDataset('./tmp_dataset.csv',
                                            format='csv',
                                            fields=[('src', src_field),
                                                    ('tgt', tgt_field)])
    os.remove('tmp_dataset.csv')
    # # create itrerator
    dataset_iter = MyIterator(dataset,
                              batch_size=args.batch_size,
                              device=args.device,
                              repeat=False,
                              sort_key=lambda x: (len(x.src), len(x.tgt)),
                              batch_size_fn=batch_size_fn,
                              train=True,
                              shuffle=True)
    # build vocab, save field object and add variable.
    src_field.build_vocab(dataset)
    tgt_field.build_vocab(dataset)
    print('end.')
    if not args.use_saved_fields:
        print('saving fields...')
        pickle.dump(src_field, open(f'{cwd}/{args.save_path}/src.pickle',
                                    'wb'))
        pickle.dump(tgt_field, open(f'{cwd}/{args.save_path}/tgt.pickle',
                                    'wb'))
        print('end.')
    iteration_num = [i for i, _ in enumerate(dataset_iter)][-1]
    # initialize model
    model = UniversalTransformer(n_src_vocab=len(src_field.vocab),
                                 n_tgt_vocab=len(tgt_field.vocab),
                                 embedding_dim=args.embedding_dim,
                                 nhead=args.nhead,
                                 max_seq_len=args.max_seq_len,
                                 max_pondering_time=args.max_pondering_time)
    # initialize param
    if args.use_saved_weights:
        print('loading saved model states...')
        model.load_state_dict(
            torch.load(f'{cwd}/{args.save_path}/model_state'))
        print('end.')
    else:
        for param in model.parameters():
            if param.dim() > 1:
                nn.init.xavier_normal_(param)
    if args.device == 'cuda':
        model = model.cuda()
    # train model
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 betas=(0.9, 0.98),
                                 eps=1e-9)
    lr_scheduler = CosineAnnealingLR(optimizer, iteration_num)
    _train(model, dataset_iter, optimizer, lr_scheduler, args, src_field,
           tgt_field, iteration_num)
    print('saving weights...')
    torch.save(model.state_dict(), f'{cwd}/{args.save_path}/model_state')
    print('end.')
Beispiel #2
0
arg_str = "args:\n"
for w in sorted(args.__dict__.keys()):
    if (w is not "U") and (w is not "V") and (w is not "Freq"):
        arg_str += "{}:\t{}\n".format(w, args.__dict__[w])
logger.info(arg_str)

# ----------------------------------------------------------------------------------------------------------------- #
#
# Starting Meta-Learning for Low-Resource Neural Machine Transaltion
#
# ----------------------------------------------------------------------------------------------------------------- #

# optimizer
if args.no_meta_training:
    meta_opt = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad],
        betas=(0.9, 0.98),
        eps=1e-9)
else:  # meta-model only updates meta-parameters
    meta_opt = torch.optim.Adam(
        [p for p in model.get_parameters(type='meta') if p.requires_grad],
        betas=(0.9, 0.98),
        eps=1e-9)

# if resume training
if (args.load_from is not None) and (args.resume):
    with torch.cuda.device(args.gpu):  # very important.
        offset, opt_states = torch.load(
            args.models_dir + '/' + args.load_from + '.pt.states',
            map_location=lambda storage, loc: storage.cuda())
        meta_opt.load_state_dict(opt_states)