def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-c', type=float, default=0.9)
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, required=True)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, required=True)
    parser.add_argument('-heads', type=int, default=4)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, required=True)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=1e-7)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    opt = parser.parse_args()

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    #opt.train1 = create_dataset1(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.999),
                                     eps=1e-8)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)
    train_model(model, opt, SRC, TRG)
Exemple #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', default="./data/in.txt")  # 原始数据 初始文件
    parser.add_argument('-trg_data', default="./data/out.txt")  # 原始数据 目标文件
    parser.add_argument('-cuda', default=True,
                        action='store_true')  # 是否使用cuda 添加此将禁用cuda,并运行模型在cpu
    parser.add_argument('-epochs', type=int,
                        default=100)  # 要为训练多少个epochs数据(默认值为1000)
    parser.add_argument('-d_model', type=int, default=512)  # 嵌入向量和层的维数(默认为512)
    parser.add_argument('-n_layers', type=int,
                        default=6)  # 在Transformer模型中有多少层(默认=6)
    parser.add_argument('-heads', type=int,
                        default=8)  # 需要分割多少个头部以获得多个头部的注意(默认值为8)
    parser.add_argument('-dropout', type=int,
                        default=0.02)  # 决定多大的dropout将(默认=0.1)
    parser.add_argument('-batchsize', type=int,
                        default=128)  # 以每次迭代中提供给模型的令牌数(默认值为1500)来度量
    # parser.add_argument('-printevery', type=int, default=200)  # 在打印前运行多少次迭代(默认值为100)
    parser.add_argument('-lr', type=int, default=0.0001)  # 学习率(默认值为0.0001)
    parser.add_argument('-premodels', default=False)  # 是否加载原来的权重 和 vecab
    parser.add_argument('-load_weights',
                        default="weights")  # 如果加载预训练的权重,把路径到文件夹,以前的权重和泡菜保存
    parser.add_argument('-premodels_path',
                        default="model_weights_s5")  # 预训练的模型文件名
    parser.add_argument('-max_strlen', type=int,
                        default=32)  # 判刑与更多的词将不包括在数据集(默认=32)

    parser.add_argument('-SGDR', action='store_true')  # 增加这将实现随机梯度下降与重启,使用余弦退火
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-floyd', default=True, action='store_true')  # ???????
    parser.add_argument('-checktime', type=int,
                        default=60)  # 每隔多长时间模型的权重就会被保存到文件夹'weights/'
    parser.add_argument('-checkpoint', type=int,
                        default=10)  # 每隔多少epochs模型的权重就会被保存到文件夹'weights/'
    #-src_ -trg_data data/english1.txt -src_lang en -trg_lang en -floyd -checkpoint 15 -batchsize 3000 -epochs 10
    opt = parser.parse_args()

    opt.device = 0 if opt.cuda is True else -1
    if opt.device == 0:
        assert torch.cuda.is_available()
    read_data(opt)  # 判断数据集是否存在 需要

    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)

    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)
    # print("opt.train_len",opt.train_len)
    if opt.checkpoint > 0 or opt.checktime > 0:
        print(
            "model weights will be saved every %d minutes  and at end of %d epoch to directory %s "
            % (opt.checktime, opt.checkpoint, config.weights))
    train_model(model, opt)
Exemple #3
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)

    opt = parser.parse_args()
    args = argparse.Namespace()
    args.is_cuda = False
    # opt.device = 0 if opt.no_cuda is False else -1
    # if opt.device == 0:
    #     assert torch.cuda.is_available()

    read_data(args)
    ss('-in main')
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Exemple #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-mode', default='train')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2000)
    parser.add_argument('-d_model', type=int, default=500)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=10)
    parser.add_argument('-dropout', type=int, default=0.2)
    parser.add_argument('-printevery', type=int, default=10)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-batch_size', type=int, default=32)
    parser.add_argument('-vid_feat_size', type=int, default=500)
    parser.add_argument('-save_freq', type=int, default=2)
    parser.add_argument('-model_save_dir', default='model')
    parser.add_argument('-log_frequency', default=20)
    # DataLoader
    parser.add_argument('-num_train_set', type=int, default=8000)
    parser.add_argument('-video_features_file', default='activitynet/anet_v1.3.c3d.hdf5')
    parser.add_argument('-video_descriptions_file', default='activitynet_descriptions.pkl')
    parser.add_argument('-vocab_file', default='activitynet_vocab.pkl')
    parser.add_argument('-video_descriptions_csv', default='data/video_description.csv')
    parser.add_argument('-target_feature_size', type=int, default=14238)
 
    opt = parser.parse_args()

    opt.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = get_model(opt, opt.vid_feat_size, opt.target_feature_size)
    model = nn.DataParallel(model)

    if opt.mode == 'train':
        print("Training model for num_epochs - {}, vocab_size - {}...".format(opt.epochs, opt.target_feature_size))
        opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
        if opt.SGDR == True:
            opt.sched = CosineWithRestarts(opt.optimizer, T_max = 10)
        model.train()
        trainloader = DataLoader(opt=opt, train=True)
        evalloader = DataLoader(opt=opt, train=False)
        train_model(model, trainloader, evalloader, opt)
    elif opt.mode == 'eval':
        print("Evaluating model...")
        model.load_state_dict(torch.load(opt.model_save_dir + '/model_595.pth'))
        model.eval()
        print("Transformer model loaded")
        evalloader = DataLoader(opt=opt, train=False)
        eval_model(model, evalloader, opt)
    else:
        print("Wrong option. Give either 'train' or 'eval' as input to -mode")
Exemple #5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=10)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batch_size', type=int, default=512)
    parser.add_argument('-print_every', type=int, default=10)
    parser.add_argument('-lr', type=float, default=0.001)
    parser.add_argument('-patience', type=int, default=3)
    parser.add_argument('-retrain', type=bool, default=False)

    opt = parser.parse_args()
    
    opt.device = 'cuda' if opt.no_cuda is False else 'cpu'
    if opt.device == 'cuda':
        assert torch.cuda.is_available()

    if opt.retrain:
        print('load checkpoint ...')
        checkpoint = torch.load('models/checkpoint.chkpt', map_location=torch.device(opt.device))
        opt = checkpoint['settings']
    else:
        checkpoint = None

    data = pickle.load('data/m30k_deen_shr.pkl')

    vocab_src = data['vocab']['src']
    vocab_trg = data['vocab']['trg']

    vocab = {'src': vocab_src, 'trg': vocab_trg}
    utils.mkdir('models')
    pickle.dump(vocab, 'models/vocab.pkl')

    opt.src_pad = vocab_src.pad_idx
    opt.trg_pad = vocab_trg.pad_idx

    opt.max_src_len = data['max_len']['src']
    opt.max_trg_len = data['max_len']['trg']

    train_data_loader, valid_data_loader, test_data_loader = prepare_dataloaders(opt, data)
    model = init_model(opt, vocab_src.vocab_size, vocab_trg.vocab_size, checkpoint=checkpoint)

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(optimizer, T_max=len(train_data_loader))

    train(model, optimizer, train_data_loader, valid_data_loader, opt)

    test(model, test_data_loader, opt)
Exemple #6
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, required=True)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, required=True)
    parser.add_argument('-restart', type=int, required=True)
    parser.add_argument('-bestval', type=float, required=True)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, required=True)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-aaa', type=float, required=True)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-src_datav', required=True)
    parser.add_argument('-trg_datav', required=True)
    opt = parser.parse_args()

    print(torch.cuda.is_available())
    print(torch.cuda.device_count())
    print(torch.cuda.get_device_name(0))
    print(torch.cuda.current_device())

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    opt.train1 = create_dataset1(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    aaa = opt.aaa
    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=aaa * 1e-8,
                                     betas=(0.9, 0.999),
                                     eps=1e-8)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)
    train_model(model, opt, SRC, TRG)
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    train_model(model, opt)

    promptNextAction(model, opt, SRC, TRG)
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-savetokens', type=int, default=0)

    opt = parser.parse_args()

    opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(opt.device)

    read_data(opt)
    SRC, TRG = create_fields(opt)

    opt.train = create_dataset(opt, SRC, TRG)

    # # convert translation dictionary to tokens ditionary
    # translation_dictionar = pickle.load(open('data/translation_dictionary.p', 'rb'))
    # new_dict = {}
    # for en_word, fr_word in translation_dictionar.items():
    #     new_dict[SRC.vocab.stoi[en_word]] = TRG.vocab.stoi[fr_word.lower()]

    # pickle.dump(new_dict, open('data/tokenized_translation_dictionary.p', 'wb'))

    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    model = model.to(device=opt.device)

    if opt.savetokens == 1:
        pickle.dump(SRC.vocab, open('SRC_vocab.p',
                                    'wb'))  # saves torchtext Vocab object
        pickle.dump(TRG.vocab, open('TRG_vocab.p',
                                    'wb'))  # saves torchtext Vocab object

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Exemple #9
0
def main():

    ############################
    ### OPTIONAL 4 THe FUTURE ##
    # DO WEIGHT DECAY BASED ON #
    # ATTENTION PAPER !!!!! ####
    ############################
    # step_list = [i*500 for i in range(2000)]
    # for step in step_list:
    #     lrate = (1/np.sqrt(512)) * min(1/np.sqrt(step), step*4000**-1.5 )
    #     print(f'{step}: lrate {lrate}')

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-src_val_data',
                        required=False,
                        default='data/port_dev.txt')
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-trg_val_data',
                        required=False,
                        default='data/eng_dev.txt')
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int,
                        default=512)  # hidden size for models using RNN
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=float, default=0.00015)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int,
                        default=100)  # max number of spaces per sentence
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-decoder_extra_layers', type=int, default=0)
    parser.add_argument('-nmt_model_type', type=str, default='transformer')
    parser.add_argument('-word_embedding_type', type=str, default=None)
    parser.add_argument('-use_dynamic_batch', action='store_true')

    opt = parser.parse_args()
    print(opt)

    # class InputArgs():
    #     def __init__(self):
    #         self.src_data = 'data/port_train.txt'
    #         self.src_val_data = 'data/port_dev.txt'
    #         self.trg_data = 'data/eng_train.txt'
    #         self.trg_val_data = 'data/eng_dev.txt'
    #         self.src_lang = 'pt'
    #         self.trg_lang = 'en'
    #         self.no_cuda = True
    #         self.SGDR = False
    #         self.epochs = 5
    #         self.d_model = 300
    #         self.n_layers = 6
    #         self.heads = 6
    #         self.dropout = 0.1
    #         self.batchsize = 1024
    #         self.printevery = 100
    #         self.lr = 0.00015
    #         self.load_weights = None
    #         self.create_valset = False
    #         self.max_strlen = 100
    #         self.checkpoint = 1
    #         self.decoder_extra_layers = 0
    #         self.nmt_model_type = 'rnn_naive_model' # 'transformer', 'rnn_naive_model', 'align_and_translate' ...
    #         self.word_embedding_type = None # None, 'glove' or 'fast_text'
    #         self.use_dynamic_batch = None
    # opt = InputArgs()
    # print(opt.__dict__)

    # opt.device = 0 if opt.no_cuda is False else torch.device("cpu")
    if opt.no_cuda is False:
        assert torch.cuda.is_available()
        opt.device = torch.device("cuda")
    else:
        opt.device = torch.device("cpu")

    i_t = time.time()
    if opt.word_embedding_type in ['glove', 'fast_text']:
        if opt.word_embedding_type == 'glove':
            word_emb = KeyedVectors.load_word2vec_format(
                'word_embeddings/glove_s300.txt')
        elif opt.word_embedding_type == 'fast_text':
            word_emb = KeyedVectors.load_word2vec_format(
                'word_embeddings/ftext_skip_s300.txt')
        now = time.time()
        minutes = math.floor((now - i_t) / 60)
        print(
            f'\nWord embeddding of type {str(opt.word_embedding_type)} took {minutes} minutes \
            and {now - i_t - minutes*60:.2f} seconds to load.\n')
    elif opt.word_embedding_type is None:
        word_emb = opt.word_embedding_type

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.SRC = SRC
    opt.TRG = TRG  # important, these are used to input embeddings
    opt.train, opt.valid, SRC, TRG = create_dataset(opt, SRC, TRG, word_emb)
    opt.word_emb = word_emb  # just for querying vocabulary
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), word_emb)

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    promptNextAction(model, opt, SRC, TRG)
Exemple #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-data_path', required=True)
    parser.add_argument('-output_dir', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-val_check_every_n', type=int, default=3)
    parser.add_argument('-calculate_val_loss', action='store_true')
    parser.add_argument('-val_forward_pass', action='store_true')
    parser.add_argument('-tensorboard_graph', action='store_true')
    parser.add_argument('-alex', action='store_true')
    parser.add_argument('-compositional_eval', action='store_true')
    parser.add_argument('-char_tokenization', action='store_true')
    parser.add_argument('-n_val', type=int, default=1000)
    parser.add_argument('-n_test', type=int, default=1000)
    parser.add_argument('-do_test', action='store_true')
    parser.add_argument('-epochs', type=int, default=50)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=3000)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=512)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()
        if opt.alex:
            torch.cuda.set_device(1)

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train, opt.val = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), SRC)

    if opt.tensorboard_graph:
        writer = SummaryWriter('runs')
        for i, batch in enumerate(opt.train):
            src = batch.src.transpose(0, 1).cuda()
            trg = batch.trg.transpose(0, 1).cuda()
            trg_input = trg[:, :-1]
            src_mask, trg_mask = create_masks(src, trg_input, opt)
            writer.add_graph(model, (src, trg_input, src_mask, trg_mask))
            break
        writer.close()

    # beam search parameters
    opt.k = 1
    opt.max_len = opt.max_strlen

    opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    opt.scheduler = ReduceLROnPlateau(opt.optimizer, factor=0.5, patience=5, verbose=True)

    if opt.SGDR:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint))

    train_model(model, opt, SRC, TRG)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Exemple #11
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', default='data/english.txt')
    parser.add_argument('-trg_data', default='data/french.txt')
    parser.add_argument('-src_lang', default='en_core_web_sm')
    parser.add_argument('-trg_lang', default='fr_core_news_sm')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=10)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-output_dir', default='output')

    opt = parser.parse_args()
    print(opt)

    opt.device = "cpu" if opt.no_cuda else "cuda"
    if opt.device == "cuda":
        assert torch.cuda.is_available()

    read_data(opt)
    SRC, TRG = create_fields(opt)

    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    if opt.device == "cuda":
        model.cuda()

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    print("saving field pickles to " + opt.output_dir + "/...")
    pickle.dump(SRC, open(f'{opt.output_dir}/SRC.pkl', 'wb'))
    pickle.dump(TRG, open(f'{opt.output_dir}/TRG.pkl', 'wb'))
    print("field pickles saved ! ")

    train_model(model, opt)
Exemple #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-data_path', required=True)
    parser.add_argument('-output_dir', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-val_check_every_n', type=int, default=3)
    parser.add_argument('-calculate_val_loss', action='store_true')
    parser.add_argument('-val_forward_pass', action='store_true')
    parser.add_argument('-tensorboard_graph', action='store_true')
    parser.add_argument('-alex', action='store_true')
    parser.add_argument('-compositional_eval', action='store_true')
    parser.add_argument('-wandb', action='store_true')
    parser.add_argument('-n_val', type=int, default=1000)
    parser.add_argument('-n_test', type=int, default=1000)
    parser.add_argument('-do_test', action='store_true')
    parser.add_argument('-epochs', type=int, default=50)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-mask_prob', type=float, default=0.5)
    parser.add_argument('-alpha', type=float, default=2)
    parser.add_argument('-batchsize', type=int, default=3000)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-log_interval', type=int, default=1000)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-load_r_to_o')
    parser.add_argument('-label_path')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-task',
                        type=str,
                        choices=["toy_task", "e_snli_r", "e_snli_o", "cos_e"],
                        default="toy_task")
    parser.add_argument('-max_strlen', type=int, default=512)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)

    opt = parser.parse_args()

    wandb_tags = [opt.task]

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()
        if opt.alex:
            torch.cuda.set_device(1)

    read_data(opt)

    if opt.task == 'e_snli_r':
        assert opt.label_path is not None
        opt.classifier_SRC, opt.classifier_TRG = create_label_fields(opt)

        with open(opt.load_r_to_o + '/SRC.pkl', 'rb') as f:
            old_SRC = pickle.load(f)

        with open(opt.load_r_to_o + '/TRG.pkl', 'rb') as f:
            old_TRG = pickle.load(f)

        opt.classifier_SRC.vocab = old_SRC.vocab
        opt.classifier_TRG.vocab = old_TRG.vocab

    SRC, TRG = create_fields(opt)
    opt.train, opt.val = create_dataset(opt, SRC, TRG)
    if opt.task == 'e_snli_o':
        model = get_classifier_model(opt, len(SRC.vocab), len(TRG.vocab))
    else:
        if opt.task == 'e_snli_r':
            opt.classifier = load_r_to_o(opt, len(opt.classifier_SRC.vocab),
                                         len(opt.classifier_TRG.vocab))
        model = get_model(opt, len(SRC.vocab), len(TRG.vocab), SRC)

    if opt.wandb:
        config = wandb.config
        config.learning_rate = opt.lr
        config.max_pred_length = opt.max_strlen
        config.mask_prob = opt.mask_prob
        config.batch_size = opt.batchsize
        config.log_interval = opt.log_interval
        group_name = 'masking_probability_p=' + str(
            opt.mask_prob) + '_alpha=' + str(opt.alpha)

        wandb.init(config=config,
                   project='toy-task',
                   entity='c-col',
                   group=group_name,
                   tags=wandb_tags)
        wandb.watch(model)

    if opt.tensorboard_graph:
        writer = SummaryWriter('runs')
        for i, batch in enumerate(opt.train):
            src = batch.src.transpose(0, 1).cuda()
            trg = batch.trg.transpose(0, 1).cuda()
            trg_input = trg[:, :-1]
            src_mask, trg_mask = create_masks(src, trg_input, opt)
            writer.add_graph(model, (src, trg_input, src_mask, trg_mask))
            break
        writer.close()

    # beam search parameters
    opt.k = 1
    opt.max_len = opt.max_strlen

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    opt.scheduler = ReduceLROnPlateau(opt.optimizer,
                                      factor=0.5,
                                      patience=5,
                                      verbose=True)

    if opt.SGDR:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    train_model(model, opt, SRC, TRG)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Exemple #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-fold', default=0)
    parser.add_argument('-src_lang', default='en')
    parser.add_argument('-trg_lang', default='en')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=1)
    parser.add_argument('-heads', type=int, default=1)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=192)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-savetokens', type=int, default=0)

    opt = parser.parse_args()
    
    opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(opt.device)
    
    read_data(opt)
    SRC, TRG = create_fields(opt)

    opt.train = create_dataset(opt, SRC, TRG)

    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), model_type='train')
    model = model.to(device=opt.device)

    if opt.savetokens == 1:
        pickle.dump(SRC.vocab, open('SRC_vocab.p', 'wb')) # saves torchtext Vocab object
        pickle.dump(TRG.vocab, open('TRG_vocab.p', 'wb')) # saves torchtext Vocab object
        
    opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print("model weights will be saved every %d minutes and at end of epoch to directory weights/"%(opt.checkpoint))
    
    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))
    
    # train model
    train_model(model, opt)

    # save weights
    dst = '../gdrive/My Drive/tweet-sentiment-extraction'
    print("saving weights to " + dst + "/...")
    torch.save(model.state_dict(), f'{dst}/model_weights')
    pickle.dump(SRC, open(f'{dst}/SRC.pkl', 'wb'))
    pickle.dump(TRG, open(f'{dst}/TRG.pkl', 'wb'))
    saved_once = 1
    print("weights and field pickles saved to " + dst)