Example #1
0
    def __init__(self, config):
        self.config = config
        self.cfg_solver = config['solver']
        self.cfg_dataset = config['data']
        self.cfg_model = config['model']

        self.max_disp = self.cfg_model['max_disp']
        self.model = get_model(self.config)
        self.test_loader = get_loader(self.config)
        self.imshow = config['imshow']
Example #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-mode', default='train')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2000)
    parser.add_argument('-d_model', type=int, default=500)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=10)
    parser.add_argument('-dropout', type=int, default=0.2)
    parser.add_argument('-printevery', type=int, default=10)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-batch_size', type=int, default=32)
    parser.add_argument('-vid_feat_size', type=int, default=500)
    parser.add_argument('-save_freq', type=int, default=2)
    parser.add_argument('-model_save_dir', default='model')
    parser.add_argument('-log_frequency', default=20)
    # DataLoader
    parser.add_argument('-num_train_set', type=int, default=8000)
    parser.add_argument('-video_features_file', default='activitynet/anet_v1.3.c3d.hdf5')
    parser.add_argument('-video_descriptions_file', default='activitynet_descriptions.pkl')
    parser.add_argument('-vocab_file', default='activitynet_vocab.pkl')
    parser.add_argument('-video_descriptions_csv', default='data/video_description.csv')
    parser.add_argument('-target_feature_size', type=int, default=14238)
 
    opt = parser.parse_args()

    opt.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = get_model(opt, opt.vid_feat_size, opt.target_feature_size)
    model = nn.DataParallel(model)

    if opt.mode == 'train':
        print("Training model for num_epochs - {}, vocab_size - {}...".format(opt.epochs, opt.target_feature_size))
        opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
        if opt.SGDR == True:
            opt.sched = CosineWithRestarts(opt.optimizer, T_max = 10)
        model.train()
        trainloader = DataLoader(opt=opt, train=True)
        evalloader = DataLoader(opt=opt, train=False)
        train_model(model, trainloader, evalloader, opt)
    elif opt.mode == 'eval':
        print("Evaluating model...")
        model.load_state_dict(torch.load(opt.model_save_dir + '/model_595.pth'))
        model.eval()
        print("Transformer model loaded")
        evalloader = DataLoader(opt=opt, train=False)
        eval_model(model, evalloader, opt)
    else:
        print("Wrong option. Give either 'train' or 'eval' as input to -mode")
Example #3
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=12)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Example #4
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights')
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1

    assert opt.k > 0
    assert opt.max_len > 10

    opt.src_lang = "en_core_web_sm"
    opt.trg_lang = "en_core_web_sm"
    opt.load_weights = "weights"
    SRC, TRG = create_fields(opt)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    sentence = []

    engine = pyttsx3.init()
    voices = engine.getProperty('voices')
    engine.setProperty('voice', voices[0].id)
    engine.setProperty('rate', 150)

    #main loop to talk to bot
    while True:
        userinput = input(">> ")
        userinput = userinput.strip()
        if userinput[-1].isalpha():
            userinput += "."
        if len(sentence) >= 5:
            sentence.pop(0)
        sentence.append(userinput)

        opt.text = ' '.join(sentence)

        phrase = getSentence(opt, model, SRC, TRG)
        print('chatbot: ' + phrase)

        #makes chat bot speak out loud
        engine.say(phrase)
        engine.runAndWait()
Example #5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-floyd', action='store_true')

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1

    assert opt.k > 0
    assert opt.max_len > 10

    SRC, TRG = create_fields(opt)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    while True:
        opt.text = input(
            "Enter a sentence to translate (type 'f' to load from file, or 'q' to quit):\n"
        )
        if opt.text == "q":
            break
        if opt.text == 'f':
            fpath = input(
                "Enter a sentence to translate (type 'f' to load from file, or 'q' to quit):\n"
            )
            try:
                import ipdb
                ipdb.set_trace()
                opt.text = ' '.join(
                    open(opt.text, encoding='utf-8').read().split('\n'))
            except:
                print("error opening or reading text file")
                continue
        phrase = translate(opt, model, SRC, TRG)
        print('> ' + phrase + '\n')
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    train_model(model, opt)

    promptNextAction(model, opt, SRC, TRG)
Example #7
0
    def __init__(self, config):

        self.config = config
        self.cfg_solver = config['solver']
        self.cfg_dataset = config['data']
        self.cfg_model = config['model']
        self.reloaded = True if self.cfg_solver['resume_iter'] > 0 else False

        self.max_disp = self.cfg_model['max_disp']
        self.loss_name = self.cfg_model['loss']
        self.train_loader, self.val_loader = get_loader(self.config)
        self.model = get_model(self.config)

        self.crit = get_losses(self.loss_name, max_disp=self.max_disp)

        if self.cfg_solver['optimizer_type'].lower() == 'rmsprop':
            self.optimizer = optim.RMSprop(self.model.parameters(), lr=self.cfg_solver['lr_init'])
        elif self.cfg_solver['optimizer_type'].lower() == 'adam':
            self.optimizer = optim.Adam(self.model.parameters(), lr=self.cfg_solver['lr_init'])
        else:
            raise NotImplementedError('Optimizer type [{:s}] is not supported'.format(self.cfg_solver['optimizer_type']))
        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=self.cfg_solver['milestones'], gamma=self.cfg_solver['gamma'])
        self.global_step = 1
def main():
    # Add parser to parse in the arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-load_weights', required=False)
    parser.add_argument('-output_name', type=str, required=True)
    parser.add_argument('-device', type=str, default="cuda:1" if torch.cuda.is_available() else "cpu")
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-d_model', type=int, default=256)
    parser.add_argument('-d_ff', type=int, default=1024)
    parser.add_argument('-n_layers', type=int, default=5)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1)
    parser.add_argument('-max_seq_len', type=int, default=1024)
    parser.add_argument('-attention_type', type = str, default = 'Baseline')
    parser.add_argument('-weights_name', type = str, default = 'model_weights')
    parser.add_argument("-concat_pos_sinusoid", type=str2bool, default=False)
    parser.add_argument("-relative_time_pitch", type=str2bool, default=False)
    parser.add_argument("-max_relative_position", type=int, default=512)
    opt = parser.parse_args()

    # Generate the vocabulary from the data
    opt.vocab = GenerateVocab(opt.src_data)
    opt.pad_token = 1

    # Create the model using the arguments and the vocab size
    model = get_model(opt, len(opt.vocab))

    # counter to keep track of how many outputs have been saved
    opt.save_counter = 0

    # Now lets generate some music
    generated_music = generate(model,opt)

    # Ask for next action
    promptNextAction(model, opt, generated_music)
Example #9
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-floyd', action='store_true')

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1

    assert opt.k > 0
    assert opt.max_len > 10

    SRC, TRG = create_fields(opt)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    origin = open("data/test_s.txt", encoding='utf-8').read().split('\n')
    truth = open("data/test_t.txt", encoding='utf-8').read().split('\n')
    results = open("results/res_15.txt", "w")
    for i in range(len(origin)):
        try:
            o = origin[i]
            t = truth[i]
            opt.text = o
            r = translate(opt, model, SRC, TRG)
            results.write(o.lower() + "\n" + r + "\n" + t + "\n\n")
        except:
            print("error opening or reading text file")
Example #10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-qdir', required=True)
    parser.add_argument('-qtitle', required=True)
    parser.add_argument('-qdesc', required=True)
    parser.add_argument('-approach', type=str, default="baseline")

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1

    assert opt.k > 0
    assert opt.max_len > 10

    #Creating query_dict where title and query against a query id would be available
    query_file_title = open(os.path.join(opt.qdir, opt.qtitle))
    query_dict = {}
    query_title_len_dict = {}
    for line in query_file_title:
        line_splitted = line.split("\t")
        query_id = line_splitted[0].strip()
        query_title = line_splitted[1].strip()
        query_dict.setdefault(query_id, [])
        query_dict[query_id].append(query_title)
        query_title_len_dict[query_id] = len(query_title.split())
        #print(line)

    query_file_desc = open(os.path.join(opt.qdir, opt.qdesc))
    for line in query_file_desc:
        line_splitted = line.split("\t")
        query_id = line_splitted[0].strip()
        query_title_desc = line_splitted[1].strip()
        query_dict[query_id].append(query_title_desc)
        title_len = query_title_len_dict[query_id]
        query_desc = ' '.join(query_title_desc.split()[title_len: ])
        query_dict[query_id].append(query_desc)

    #Query dict created
    #Order in which queries are put: title, title_description, description
    for line in query_file_desc:
        print(line)
    print(query_file_desc)

    translation_file = open(os.path.join(opt.qdir, "nqt", "translation_baseline_epoch5.txt"), "w")

    SRC, TRG = create_fields(opt)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    phrase = "query"
    for query_id in query_dict.keys():
        #translate the title first
        opt.text = query_dict[query_id][0]
        print("1---------------------------------------------------")
        print(opt.text)
        phrase = translate(opt, model, SRC, TRG)
        query_dict[query_id].append(phrase)
        print(phrase)
        print("***************************************************")
        # now translate the title_description

        opt.text = query_dict[query_id][1]
        print("2---------------------------------------------------")
        print(opt.text)
        phrase = translate(opt, model, SRC, TRG)
        query_dict[query_id].append(phrase)
        # now translate the description only
        print(phrase)
        print("***************************************************")

        opt.text = query_dict[query_id][2]
        print("3---------------------------------------------------")
        print(opt.text)
        phrase = translate(opt, model, SRC, TRG)
        query_dict[query_id].append(phrase)
        print(phrase)
        print("***************************************************")

    json.dump(query_dict, translation_file)
Example #11
0
def main():
    
    parser = argparse.ArgumentParser()
    parser.add_argument('-translate_file', required=True)
    parser.add_argument('-output', required=True)
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-nmt_model_type', type=str, default='transformer')
    parser.add_argument('-decoder_extra_layers', type=int, default=0)
    parser.add_argument('-word_embedding_type', type=str, default=None)
    
    opt = parser.parse_args()
    print(opt)

    # class InputArgs():
    #     def __init__(self):
    #         self.translate_file = 'data/port_test.txt'
    #         self.output = 'test_translations.txt' # 'rnn_naive_model_translations.txt' # 'vanilla_transformer.txt' 
    #         self.load_weights = 'naive_dmodel512' # 'weights_test' # 'rnn_naive_model' # 'transformer_test'
    #         self.src_lang = 'pt'
    #         self.trg_lang = 'en'
    #         self.no_cuda = True
    #         self.d_model = 512 # 300 
    #         self.heads = 8 # 6
    #         self.nmt_model_type = 'rnn_naive_model' # 'transformer', 'rnn_naive_model', 'align_and_translate' ...
    #         self.word_embedding_type = None # None, 'glove' or 'fast_text'
    #         self.k = 3
    #         self.max_len = 100
    #         self.dropout = 0.1
    #         self.n_layers = 6
    #         self.decoder_extra_layers = 0
    #         self.floyd = False
    #         # self.use_dynamic_batch = None
    # opt = InputArgs()
    # print(opt.__dict__)

    if opt.no_cuda is False:
        assert torch.cuda.is_available()
        opt.device = torch.device("cuda")
    else:
        opt.device = torch.device("cpu")
 
    assert opt.k > 0
    assert opt.max_len > 10

    i_t = time.time()
    if opt.word_embedding_type in ['glove', 'fast_text']:
        if opt.word_embedding_type == 'glove':
            word_emb = KeyedVectors.load_word2vec_format('word_embeddings/glove_s300.txt')
        elif opt.word_embedding_type == 'fast_text':
            word_emb = KeyedVectors.load_word2vec_format('word_embeddings/ftext_skip_s300.txt')
        now = time.time()
        minutes = math.floor((now - i_t)/60)
        print(f'\nWord embeddding of type {str(opt.word_embedding_type)} took {minutes} minutes \
            and {now - i_t - minutes*60:.2f} seconds to load.\n')
    elif opt.word_embedding_type is None:
        word_emb = opt.word_embedding_type

    SRC, TRG = create_fields(opt)
    opt.SRC = SRC; opt.TRG = TRG # important, these are used to input embeddings
    opt.word_emb = word_emb # just for querying vocabulary
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), word_emb)
    
    try:
        opt.text = open(opt.translate_file, encoding='utf-8').read().split('\n')
    except:
        print("error opening or reading text file")
    phrase = translate(opt, model, SRC, TRG)
    f = open(opt.output, "w+")
    f.write(phrase)
    f.close()

    print('Sample >'+ phrase[:300] + '\n')
Example #12
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-bleu', type=bool, default=False)

    opt = parser.parse_args()

    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print(f'Load Tokenizer and Vocab...')
    en_sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm_en')
    ko_sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm_ko')

    en_sp_vocab = en_sp_tokenizer.vocab
    ko_sp_vocab = ko_sp_tokenizer.vocab

    print(f'Load the extended vocab...')
    en_vocab = Vocabulary.load_vocab('./ko_data/en_vocab')
    ko_vocab = Vocabulary.load_vocab('./ko_data/ko_vocab')

    ######################TEST DATA######################
    # fitting the test dataset dir
    test_data_dir = [
        './ko_data/test/test_ko_en.en', './ko_data/test/test_ko_en.ko'
    ]

    with open(test_data_dir[0], encoding='utf-8') as f:
        src_corpus = [line.strip().split('\n') for line in f.readlines()]

    with open(test_data_dir[1], encoding='utf-8') as f:
        tgt_corpus = [line.strip().split('\n') for line in f.readlines()]
    ####################################################
    model = get_model(opt, len(en_vocab), len(ko_vocab))

    #opt.text = 'How are you?'
    if opt.bleu == False:
        pred = []
        refer = []
        for batch_idx, (text, tgt) in enumerate(zip(src_corpus, tgt_corpus)):

            opt.text = text[0]
            #print(f'length: {len(opt.text)}')

            phrase = translate(opt, model, en_vocab, ko_vocab, en_sp_tokenizer,
                               ko_sp_tokenizer)

            pred.append(phrase)
            refer.append(tgt)

        #phrase = translate(opt, model, vocab, sp_tokenizer)

        with open('translation_result1', 'wb') as f:
            pc.dump(pred, f)
        with open('reference_result1', 'wb') as f:
            pc.dump(refer, f)
    else:
        with open('translation_result1', 'rb') as f:
            translation = pc.load(f)
        with open('reference_result1', 'rb') as f:
            reference = pc.load(f)

        compute_metrics(reference, translation)
Example #13
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=80)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)

    opt = parser.parse_args()

    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print(f'Load Tokenizer and Vocab...')
    sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm')
    sp_vocab = sp_tokenizer.vocab

    print(f'Load the extended vocab...')
    vocab = Vocabulary.load_vocab('./data/vocab')

    ######################TEST DATA######################
    # fitting the test dataset dir
    test_data_dir = [
        './data/test/newstest2014_en', './data/test/newstest2014_de'
    ]
    test_dataset = Our_Handler(src_path=test_data_dir[0],
                               tgt_path=test_data_dir[1],
                               vocab=vocab,
                               tokenizer=sp_tokenizer,
                               max_len=256,
                               is_test=True)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=8,
                                 shuffle=False,
                                 drop_last=True)
    opt.test = test_dataloader
    opt.test_len = len(test_dataloader)
    ####################################################
    model = get_model(opt, len(vocab), len(vocab))

    while True:
        opt.text = input(
            "Enter a sentence to translate (type 'f' to load from file, or 'q' to quit):\n"
        )
        if opt.text == "q":
            break
        if opt.text == 'f':
            fpath = input(
                "Enter a sentence to translate (type 'f' to load from file, or 'q' to quit):\n"
            )
            try:
                opt.text = ' '.join(
                    open(opt.text, encoding='utf-8').read().split('\n'))
            except:
                print("error opening or reading text file")
                continue
        phrase = translate(opt, model, SRC, TRG)
        print('> ' + phrase + '\n')
Example #14
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=50)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=500)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('--is_train', type=bool, default=False)
    parser.add_argument('--is_test', type=bool, default=False)

    opt = parser.parse_args()
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # baseline code
    #read_data(opt)
    #SRC, TRG = create_fields(opt)
    #opt.train = create_dataset(opt, SRC, TRG)

    if not os.path.exists('./data'):
        os.makedirs('./data')

    # modified version for tokenize, vocab, dataset
    if opt.is_train:
        filenames = ['data/europarl-v7.de-en.en', 'data/europarl-v7.de-en.de']

        print(f'Build Tokenizer and Vocab...')
        sp_tokenizer = Tokenizer(is_train=True,
                                 filenames=filenames,
                                 tokenizer_type='spm',
                                 input_file='./data/concat_data.txt',
                                 model_prefix='spm',
                                 vocab_size=32000,
                                 model_type='bpe')
        sp_vocab = sp_tokenizer.vocab
    else:
        print(f'Load Tokenizer and Vocab...')
        sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm')
        sp_vocab = sp_tokenizer.vocab

    if opt.is_train:
        print(f'Extend Vocab...')
        vocab = Vocabulary(sp_vocab)
        vocab.save_vocab('./data/vocab')
    else:
        print(f'Load the extended vocab...')
        vocab = Vocabulary.load_vocab('./data/vocab')

    train_dataset = Our_Handler(src_path='./data/europarl-v7.de-en.en',
                                tgt_path='./data/europarl-v7.de-en.de',
                                vocab=vocab,
                                tokenizer=sp_tokenizer,
                                max_len=32)
    #     print(train_dataset[0])
    #     print(train_dataset[0][0].shape, train_dataset[0][1].shape, train_dataset[0][2].shape)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=128,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)
    opt.train = train_dataloader
    opt.src_pad = opt.trg_pad = vocab.pad_index
    opt.train_len = len(train_dataloader)

    ######################DEV DATA######################
    # fitting the dev dataset dir
    dev_data_dir = ['./data/dev/newstest2013.en', './data/dev/newstest2013.de']
    dev_dataset = Our_Handler(src_path=dev_data_dir[0],
                              tgt_path=dev_data_dir[1],
                              vocab=vocab,
                              tokenizer=sp_tokenizer,
                              max_len=32)

    dev_dataloader = DataLoader(dev_dataset,
                                batch_size=128,
                                shuffle=False,
                                drop_last=True)
    opt.validation = dev_dataloader
    opt.val_len = len(dev_dataloader)
    ####################################################

    ######################TEST DATA######################
    # fitting the test dataset dir
    test_data_dir = [
        './data/test/newstest2014_en', './data/test/newstest2014_de'
    ]
    test_dataset = Our_Handler(src_path=test_data_dir[0],
                               tgt_path=test_data_dir[1],
                               vocab=vocab,
                               tokenizer=sp_tokenizer,
                               max_len=32,
                               is_test=True)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=128,
                                 shuffle=False,
                                 drop_last=True)
    opt.test = test_dataloader
    opt.test_len = len(test_dataloader)
    ####################################################

    #model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    model = get_model(opt, len(vocab), len(vocab))

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        print(f'Use {torch.cuda.device_count()} GPUs')

    model = model.to(opt.device)

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        # opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)
        opt.sched = Triangular(opt.optimizer,
                               num_epochs=opt.epochs,
                               warm_up=opt.epochs // 4,
                               cool_down=opt.epochs // 4)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    if opt.is_test:
        test_model(model, opt)
    else:
        train_model(model, opt)
        if opt.floyd is False:
            promptNextAction(model, opt)
Example #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=50)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=500)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('--is_train', type=bool, default=False)
    parser.add_argument('--is_test', type=bool, default=False)

    opt = parser.parse_args()
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # baseline code
    #read_data(opt)
    #SRC, TRG = create_fields(opt)
    #opt.train = create_dataset(opt, SRC, TRG)

    if not os.path.exists('./ko_data'):
        os.makedirs('./ko_data')

    # modified version for tokenize, vocab, dataset
    if opt.is_train:
        source_file_name = 'ko_data/train_ko_en.en'
        target_file_name = 'ko_data/train_ko_en.ko'

        print(f'Build Tokenizer and Vocab...')
        en_sp_tokenizer = Tokenizer(is_train=True,
                                    filename=source_file_name,
                                    tokenizer_type='spm',
                                    model_prefix='spm_en',
                                    vocab_size=32000,
                                    model_type='bpe')
        ko_sp_tokenizer = Tokenizer(is_train=True,
                                    filename=target_file_name,
                                    tokenizer_type='spm',
                                    model_prefix='spm_ko',
                                    vocab_size=32000,
                                    model_type='bpe')
        en_sp_vocab = en_sp_tokenizer.vocab
        ko_sp_vocab = ko_sp_tokenizer.vocab
    else:
        print(f'Load Tokenizer and Vocab...')
        en_sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm_en')
        ko_sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm_ko')

        en_sp_vocab = en_sp_tokenizer.vocab
        ko_sp_vocab = ko_sp_tokenizer.vocab

    if opt.is_train:
        print(f'Extend Vocab...')
        en_vocab = Vocabulary(en_sp_vocab)
        ko_vocab = Vocabulary(ko_sp_vocab)
        en_vocab.save_vocab('./ko_data/en_vocab')
        ko_vocab.save_vocab('./ko_data/ko_vocab')
    else:
        print(f'Load the extended vocab...')
        en_vocab = Vocabulary.load_vocab('./ko_data/en_vocab')
        ko_vocab = Vocabulary.load_vocab('./ko_data/ko_vocab')

    #En-Ko
#     train_dataset = Our_Handler(src_path='./ko_data/train_ko_en.en', tgt_path='./ko_data/train_ko_en.ko',
#                                 en_vocab=en_vocab, ko_vocab=ko_vocab, en_tokenizer=en_sp_tokenizer, ko_tokenizer=ko_sp_tokenizer, max_len=100)
#Ko-En
    train_dataset = Our_Handler(src_path='./ko_data/train_ko_en.ko',
                                tgt_path='./ko_data/train_ko_en.en',
                                en_vocab=en_vocab,
                                ko_vocab=ko_vocab,
                                en_tokenizer=en_sp_tokenizer,
                                ko_tokenizer=ko_sp_tokenizer,
                                max_len=90)

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=32,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)
    opt.train = train_dataloader
    #En-Ko
    opt.src_pad = en_vocab.pad_index
    opt.trg_pad = ko_vocab.pad_index

    #Ko-En
    opt.src_pad = en_vocab.pad_index
    opt.trg_pad = ko_vocab.pad_index

    opt.train_len = len(train_dataloader)

    ######################DEV DATA######################
    # fitting the dev dataset dir
    dev_data_dir = ['./ko_data/dev/dev_ko_en.en', './ko_data/dev/dev_ko_en.ko']

    #En-Ko
    #     dev_dataset = Our_Handler(src_path=dev_data_dir[0],
    #                             tgt_path=dev_data_dir[1],
    #                             en_vocab=en_vocab,ko_vocab=ko_vocab,
    #                             en_tokenizer=en_sp_tokenizer, ko_tokenizer=ko_sp_tokenizer,
    #                             max_len=100)
    #Ko-En
    dev_dataset = Our_Handler(src_path=dev_data_dir[1],
                              tgt_path=dev_data_dir[0],
                              en_vocab=en_vocab,
                              ko_vocab=ko_vocab,
                              en_tokenizer=en_sp_tokenizer,
                              ko_tokenizer=ko_sp_tokenizer,
                              max_len=90)

    dev_dataloader = DataLoader(dev_dataset,
                                batch_size=32,
                                shuffle=False,
                                drop_last=True)
    opt.validation = dev_dataloader
    opt.val_len = len(dev_dataloader)
    ####################################################

    ######################TEST DATA######################
    # fitting the test dataset dir
    test_data_dir = [
        './ko_data/test/test_ko_en.en', './ko_data/test/test_ko_en.ko'
    ]

    #En-Ko
    #     test_dataset = Our_Handler(src_path=test_data_dir[0],
    #                             tgt_path=test_data_dir[1],
    #                             en_vocab=en_vocab,ko_vocab=ko_vocab,
    #                             en_tokenizer=en_sp_tokenizer, ko_tokenizer=ko_sp_tokenizer,
    #                             max_len=100,
    #                             # is_test=True
    #                             is_test=False)
    #Ko-En
    test_dataset = Our_Handler(
        src_path=test_data_dir[1],
        tgt_path=test_data_dir[0],
        en_vocab=en_vocab,
        ko_vocab=ko_vocab,
        en_tokenizer=en_sp_tokenizer,
        ko_tokenizer=ko_sp_tokenizer,
        max_len=90,
        # is_test=True
        is_test=False)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=32,
                                 shuffle=False,
                                 drop_last=True)
    opt.test = test_dataloader
    opt.test_len = len(test_dataloader)
    ####################################################

    #En-Ko
    #     model = get_model(opt, len(en_vocab), len(ko_vocab))

    #Ko-En
    model = get_model(opt, len(ko_vocab), len(en_vocab))

    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
        print(f'Use {torch.cuda.device_count()} GPUs')

    model = model.to(opt.device)

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        # opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)
        opt.sched = Triangular(opt.optimizer,
                               num_epochs=opt.epochs,
                               warm_up=opt.epochs // 4,
                               cool_down=opt.epochs // 4)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    if opt.is_test:
        test_model(model, opt)
    else:
        train_model(model, opt)
Example #16
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', default='data/english.txt')
    parser.add_argument('-trg_data', default='data/french.txt')
    parser.add_argument('-src_lang', default='en_core_web_sm')
    parser.add_argument('-trg_lang', default='fr_core_news_sm')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=10)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-output_dir', default='output')

    opt = parser.parse_args()
    print(opt)

    opt.device = "cpu" if opt.no_cuda else "cuda"
    if opt.device == "cuda":
        assert torch.cuda.is_available()

    read_data(opt)
    SRC, TRG = create_fields(opt)

    if not os.path.isdir(opt.output_dir):
        os.makedirs(opt.output_dir)

    opt.train = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    if opt.device == "cuda":
        model.cuda()

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    print("saving field pickles to " + opt.output_dir + "/...")
    pickle.dump(SRC, open(f'{opt.output_dir}/SRC.pkl', 'wb'))
    pickle.dump(TRG, open(f'{opt.output_dir}/TRG.pkl', 'wb'))
    print("field pickles saved ! ")

    train_model(model, opt)
Example #17
0
def main():

    ############################
    ### OPTIONAL 4 THe FUTURE ##
    # DO WEIGHT DECAY BASED ON #
    # ATTENTION PAPER !!!!! ####
    ############################
    # step_list = [i*500 for i in range(2000)]
    # for step in step_list:
    #     lrate = (1/np.sqrt(512)) * min(1/np.sqrt(step), step*4000**-1.5 )
    #     print(f'{step}: lrate {lrate}')

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-src_val_data',
                        required=False,
                        default='data/port_dev.txt')
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-trg_val_data',
                        required=False,
                        default='data/eng_dev.txt')
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int,
                        default=512)  # hidden size for models using RNN
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=float, default=0.00015)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int,
                        default=100)  # max number of spaces per sentence
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-decoder_extra_layers', type=int, default=0)
    parser.add_argument('-nmt_model_type', type=str, default='transformer')
    parser.add_argument('-word_embedding_type', type=str, default=None)
    parser.add_argument('-use_dynamic_batch', action='store_true')

    opt = parser.parse_args()
    print(opt)

    # class InputArgs():
    #     def __init__(self):
    #         self.src_data = 'data/port_train.txt'
    #         self.src_val_data = 'data/port_dev.txt'
    #         self.trg_data = 'data/eng_train.txt'
    #         self.trg_val_data = 'data/eng_dev.txt'
    #         self.src_lang = 'pt'
    #         self.trg_lang = 'en'
    #         self.no_cuda = True
    #         self.SGDR = False
    #         self.epochs = 5
    #         self.d_model = 300
    #         self.n_layers = 6
    #         self.heads = 6
    #         self.dropout = 0.1
    #         self.batchsize = 1024
    #         self.printevery = 100
    #         self.lr = 0.00015
    #         self.load_weights = None
    #         self.create_valset = False
    #         self.max_strlen = 100
    #         self.checkpoint = 1
    #         self.decoder_extra_layers = 0
    #         self.nmt_model_type = 'rnn_naive_model' # 'transformer', 'rnn_naive_model', 'align_and_translate' ...
    #         self.word_embedding_type = None # None, 'glove' or 'fast_text'
    #         self.use_dynamic_batch = None
    # opt = InputArgs()
    # print(opt.__dict__)

    # opt.device = 0 if opt.no_cuda is False else torch.device("cpu")
    if opt.no_cuda is False:
        assert torch.cuda.is_available()
        opt.device = torch.device("cuda")
    else:
        opt.device = torch.device("cpu")

    i_t = time.time()
    if opt.word_embedding_type in ['glove', 'fast_text']:
        if opt.word_embedding_type == 'glove':
            word_emb = KeyedVectors.load_word2vec_format(
                'word_embeddings/glove_s300.txt')
        elif opt.word_embedding_type == 'fast_text':
            word_emb = KeyedVectors.load_word2vec_format(
                'word_embeddings/ftext_skip_s300.txt')
        now = time.time()
        minutes = math.floor((now - i_t) / 60)
        print(
            f'\nWord embeddding of type {str(opt.word_embedding_type)} took {minutes} minutes \
            and {now - i_t - minutes*60:.2f} seconds to load.\n')
    elif opt.word_embedding_type is None:
        word_emb = opt.word_embedding_type

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.SRC = SRC
    opt.TRG = TRG  # important, these are used to input embeddings
    opt.train, opt.valid, SRC, TRG = create_dataset(opt, SRC, TRG, word_emb)
    opt.word_emb = word_emb  # just for querying vocabulary
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), word_emb)

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    promptNextAction(model, opt, SRC, TRG)
Example #18
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-load_weights', required=True)
    parser.add_argument('-k', type=int, default=3)
    parser.add_argument('-max_len', type=int, default=32)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)

    opt = parser.parse_args()

    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print(f'Load Tokenizer and Vocab...')
    sp_tokenizer = Tokenizer(is_train=False, model_prefix='spm')
    sp_vocab = sp_tokenizer.vocab

    print(f'Load the extended vocab...')
    vocab = Vocabulary.load_vocab('./data/vocab')

    ######################TEST DATA######################
    # fitting the test dataset dir
    test_data_dir = [
        './data/test/newstest2014_en', './data/test/newstest2014_de'
    ]
    test_dataset = Our_Handler(src_path=test_data_dir[0],
                               tgt_path=test_data_dir[1],
                               vocab=vocab,
                               tokenizer=sp_tokenizer,
                               max_len=32,
                               is_test=True)

    test_dataloader = DataLoader(test_dataset,
                                 batch_size=64,
                                 shuffle=False,
                                 drop_last=True)
    opt.test = test_dataloader
    opt.test_len = len(test_dataloader)
    ####################################################
    model = get_model(opt, len(vocab), len(vocab))

    model.eval()
    opt.src_pad = opt.trg_pad = vocab.pad_index

    test_loss = 0.
    test_ppl = 0.
    for batch_idx, (enc_input, dec_input, dec_output) in enumerate(opt.test):

        if batch_idx == 1:
            break

        enc_input = enc_input.to(opt.device)
        dec_input = dec_input.to(opt.device)
        dec_output = dec_output.to(opt.device)

        src_mask, trg_mask = create_masks(enc_input, dec_input, opt)

        with torch.no_grad():
            preds = model(enc_input, dec_input, src_mask, trg_mask)

        ys = dec_output.contiguous().view(-1)

        loss = F.cross_entropy(preds.view(-1, preds.size(-1)),
                               ys,
                               ignore_index=opt.trg_pad)

        test_loss += loss.item()
        test_ppl += np.exp(loss.item())

    avg_test_loss = test_loss / len(opt.test)
    avg_ppl = test_ppl / len(opt.test)
    print(f'Test loss: {avg_test_loss:.3f}, Test perpelxity: {avg_ppl:.3f}')
Example #19
0
def main_total(model_Key):
    history_sum = {}
    # res_pre={}
    for key in model_Key.keys():
        for model_path in model_Key[key]:
            parser = argparse.ArgumentParser()
            parser.add_argument('-premodels',
                                default=True)  # 是否加载原来的权重 和 vecab
            parser.add_argument('-load_weights', default="weights_" +
                                key)  # 如果加载预训练的权重,把路径到文件夹,以前的权重和泡菜保存
            parser.add_argument('-premodels_path',
                                default="model_weights_" +
                                model_path)  # 预训练的模型文件名
            parser.add_argument('-k', type=int,
                                default=1)  # topK 感觉不需要,设置为1 就OK 或者将其删除
            parser.add_argument('-max_len', type=int,
                                default=32)  # 最长长度 需加上起始位置
            parser.add_argument('-d_model', type=int,
                                default=512)  # 嵌入向量和层的维数(默认为512)
            parser.add_argument('-n_layers', type=int,
                                default=6)  # 在Transformer模型中有多少层(默认=6)
            parser.add_argument('-heads', type=int,
                                default=8)  # 需要分割多少个头部以获得多个头部的注意(默认值为8)
            parser.add_argument('-dropout', type=int,
                                default=0.1)  # 决定多大的dropout将(默认=0.1)
            parser.add_argument('-cuda', default=True, action='store_true')
            parser.add_argument('-floyd', action='store_true')
            opt = parser.parse_args()
            opt.device = 0 if opt.cuda is True else -1
            if opt.device == 0:
                assert torch.cuda.is_available()
            # assert opt.k > 0
            # assert opt.max_len > 10 # 不需要断言最长长度
            # print(opt.load_weights,opt.premodels_path)
            SRC, TRG = create_fields(opt)
            model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
            lis = [
                '01 10 07 11 06', '10 08 06 07 03', '07 04 02 11 03',
                '06 03 08 02 04', '02 04 03 10 08', '10 05 09 07 04',
                '01 07 10 04 03', '11 03 06 02 04', '03 02 11 06 04',
                '01 09 05 04 10', '08 05 03 01 10', '11 09 07 06 01',
                '02 11 05 06 04', '09 05 04 03 11', '07 05 11 09 03',
                '05 10 03 09 04', '06 03 05 07 02', '05 06 03 10 02',
                '05 07 09 06 04', '05 09 08 03 07', '07 10 06 08 09',
                '11 08 09 06 02', '08 06 07 10 04', '09 07 02 05 01',
                '03 07 08 01 11', '04 02 08 06 01', '07 10 11 04 02',
                '11 02 07 03 08', '07 06 11 05 09', '05 01 10 04 06',
                '03 06 02 08 04', '06 10 07 08 03', '03 05 06 08 10',
                '11 05 01 08 02', '01 11 10 06 08', '10 04 02 08 09',
                '10 11 02 08 04', '03 02 01 09 05', '05 09 08 04 06',
                '10 02 11 06 03', '08 03 04 01 06', '11 04 02 09 06',
                '06 04 11 10 08', '08 11 03 05 04', '11 01 04 02 05',
                '05 09 01 04 06', '10 07 01 09 06', '10 08 09 01 11',
                '03 04 06 02 01', '02 10 08 09 04', '11 09 03 02 06',
                '04 02 06 01 09', '02 11 10 06 09', '10 01 11 09 04',
                '11 05 06 08 02', '02 07 03 08 11', '11 06 03 09 01',
                '01 04 11 05 03', '01 06 04 11 02', '03 08 02 09 04',
                '05 07 03 01 10', '06 08 09 02 05', '09 02 08 06 11',
                '06 08 02 03 01', '03 10 02 09 11', '11 05 08 06 09',
                '06 03 07 01 10', '08 04 06 01 07', '05 08 07 03 11',
                '03 10 05 02 09', '02 03 05 09 04', '07 04 01 03 06',
                '07 10 09 01 02', '03 01 05 02 09', '07 01 02 11 09',
                '11 07 01 03 06', '07 11 08 01 09', '11 01 08 07 06',
                '08 11 04 05 07', '01 09 06 10 07', '02 05 06 01 11',
                '06 10 11 02 08', '09 06 01 08 05', '06 05 02 10 08',
                '03 01 06 09 10', '11 09 06 04 07', '07 08 06 11 03',
                '07 03 11 04 02', '06 05 10 09 02', '08 04 10 05 11',
                '06 04 08 03 05', '01 07 06 11 04', '04 06 10 02 07',
                '07 05 04 01 06', '04 09 07 06 05', '01 07 10 09 06',
                '01 11 05 07 09', '05 04 11 09 07', '11 05 01 10 03',
                '07 02 11 05 01', '04 05 09 07 08', '03 01 09 10 07',
                '06 05 08 02 04', '04 09 10 11 05', '08 06 05 09 04',
                '01 06 10 03 04', '11 09 10 01 08', '01 04 10 03 02',
                '04 09 10 11 05', '09 03 10 02 07', '08 02 06 11 07',
                '09 06 10 01 02', '06 11 02 08 03', '10 03 04 09 07',
                '05 01 02 11 09', '04 10 07 08 11', '08 05 01 04 06',
                '06 09 10 02 07', '10 07 06 03 01', '10 09 11 07 02',
                '09 11 04 06 08', '06 04 07 08 10', '07 06 08 10 11',
                '09 08 03 10 04', '07 02 11 10 06', '04 08 09 05 10',
                '04 02 08 03 07', '01 02 07 10 09', '01 07 09 03 08',
                '11 04 01 06 03', '02 08 03 01 11', '02 01 06 04 03',
                '05 03 09 01 07', '05 03 08 04 07', '01 03 10 09 02',
                '10 02 05 04 08', '05 02 06 11 07', '01 04 07 09 10',
                '03 10 05 06 07', '09 03 05 07 10', '01 05 08 04 03',
                '09 05 08 03 11', '05 08 02 04 01', '03 11 06 01 04',
                '10 04 06 11 09', '07 05 08 11 10', '08 04 03 07 01',
                '04 07 10 06 02', '03 07 02 11 09', '03 08 11 07 01',
                '02 05 10 09 01', '05 06 08 09 11', '07 05 10 09 06',
                '07 01 10 08 06', '01 11 05 04 10', '09 07 08 06 04',
                '01 11 08 03 05', '01 07 05 08 04', '07 02 08 04 05'

                # '07 03 06 10 09', '08 04 03 11 02', '04 07 03 11 09', '03 02 07 01 04', '04 03 09 01 02',
                # '01 02 05 08 10', '07 10 05 03 01', '08 05 03 10 04', '02 01 06 03 04', '02 04 10 05 11',
                # '04 06 03 05 10', '06 01 05 02 10', '09 02 06 03 01', '08 09 01 03 07', '03 02 10 05 07',
                # '10 08 09 06 07', '03 06 08 10 11', '10 08 06 02 01'
            ]
            num = config.history
            for i, line in enumerate(lis):
                if i >= num - 1:
                    if config.tag:
                        opt.text = " <tag> ".join(lis[i - num + 1:i + 1])
                    else:
                        opt.text = " ".join(lis[i - num + 1:i + 1])
                    result_phrase = translate_one_sentence(
                        opt, model, SRC, TRG)  # 预测值
                    if i + 1 not in history_sum.keys():  # 统计第i+1期预测的值
                        history_sum[i + 1] = [result_phrase]
                    else:
                        history_sum[i + 1] += [result_phrase]
                # if i != len(lis) - 1:
                #     res_pre[i+1]=lis[i+1]
    print("history_sum", history_sum)
    total2 = []
    total3 = []
    for i in range(3, len(lis)):
        # history_sum[i] # 预测的第i期 和 lis[i] 实际值 比较
        sorting = count_(
            [word for words in history_sum[i] for word in words.split(" ")])

        # similar2=len(list(set(lis[i].split(" ")).intersection(set([a for a,b in sorting[:3]]))))  # 获取排列前三的数据
        # similar2=len(list(set(lis[i].split(" ")).intersection(set([a for a,b in sorting[-2:]+sorting[:1] ]))))  # 获取排列前三的数据
        # similar3=len(list(set(lis[i].split(" ")).intersection(set([a for a,b in sorting[-2:]+sorting[:2] ]))))  # 获取排列前四的数据
        similar2 = len(
            list(
                set(lis[i].split(" ")).intersection(
                    set([a for a, b in sorting[:4]]))))  # 获取排列前三的数据
        similar3 = len(
            list(
                set(lis[i].split(" ")).intersection(
                    set([a for a, b in sorting[:5]]))))  # 获取排列前四的数据

        if similar2 < 2:
            total2.append(-6)
        elif similar2 == 2:
            total2.append(0)
        elif similar2 == 3:
            total2.append(12)

        if similar3 <= 2:
            total3.append(-8)
        elif similar3 == 3:
            total3.append(11)
        elif similar3 == 4:
            total3.append(68)
    print("2", total2)
    sum2 = 0
    for i in total2:
        sum2 += i
    print("任二 一共投注 %d 期 最终结果 输赢值 为 %d" % (len(lis) - 1, sum2))
    print("3", total3)
    sum3 = 0
    for i in total3:
        sum3 += i
    print("任三 一共投注 %d 期 最终结果 输赢值 为 %d" % (len(lis) - 1, sum3))

    print("预测最后一期",
          count_([
              word for words in history_sum[len(lis)]
              for word in words.split(" ")
          ]))  # 预测的最后一期
Example #20
0
def main_simple(model_Key):

    lis = [
        '01 10 07 11 06', '10 08 06 07 03', '07 04 02 11 03', '06 03 08 02 04',
        '02 04 03 10 08', '10 05 09 07 04', '01 07 10 04 03', '11 03 06 02 04',
        '03 02 11 06 04', '01 09 05 04 10', '08 05 03 01 10', '11 09 07 06 01',
        '02 11 05 06 04', '09 05 04 03 11', '07 05 11 09 03', '05 10 03 09 04',
        '06 03 05 07 02', '05 06 03 10 02', '05 07 09 06 04', '05 09 08 03 07',
        '07 10 06 08 09', '11 08 09 06 02', '08 06 07 10 04', '09 07 02 05 01',
        '03 07 08 01 11', '04 02 08 06 01', '07 10 11 04 02', '11 02 07 03 08',
        '07 06 11 05 09', '05 01 10 04 06', '03 06 02 08 04', '06 10 07 08 03',
        '03 05 06 08 10', '11 05 01 08 02', '01 11 10 06 08', '10 04 02 08 09',
        '10 11 02 08 04', '03 02 01 09 05', '05 09 08 04 06', '10 02 11 06 03',
        '08 03 04 01 06', '11 04 02 09 06', '06 04 11 10 08', '08 11 03 05 04',
        '11 01 04 02 05', '05 09 01 04 06', '10 07 01 09 06', '10 08 09 01 11',
        '03 04 06 02 01', '02 10 08 09 04', '11 09 03 02 06', '04 02 06 01 09',
        '02 11 10 06 09', '10 01 11 09 04', '11 05 06 08 02', '02 07 03 08 11',
        '11 06 03 09 01', '01 04 11 05 03', '01 06 04 11 02', '03 08 02 09 04',
        '05 07 03 01 10', '06 08 09 02 05', '09 02 08 06 11', '06 08 02 03 01',
        '03 10 02 09 11', '11 05 08 06 09', '06 03 07 01 10', '08 04 06 01 07',
        '05 08 07 03 11', '03 10 05 02 09', '02 03 05 09 04', '07 04 01 03 06',
        '07 10 09 01 02', '03 01 05 02 09', '07 01 02 11 09', '11 07 01 03 06',
        '07 11 08 01 09', '11 01 08 07 06', '08 11 04 05 07', '01 09 06 10 07',
        '02 05 06 01 11', '06 10 11 02 08', '09 06 01 08 05', '06 05 02 10 08',
        '03 01 06 09 10', '11 09 06 04 07', '07 08 06 11 03', '07 03 11 04 02',
        '06 05 10 09 02', '08 04 10 05 11', '06 04 08 03 05', '01 07 06 11 04',
        '04 06 10 02 07', '07 05 04 01 06', '04 09 07 06 05', '01 07 10 09 06',
        '01 11 05 07 09', '05 04 11 09 07', '11 05 01 10 03', '07 02 11 05 01',
        '04 05 09 07 08', '03 01 09 10 07', '06 05 08 02 04', '04 09 10 11 05',
        '08 06 05 09 04', '01 06 10 03 04', '11 09 10 01 08', '01 04 10 03 02',
        '04 09 10 11 05', '09 03 10 02 07', '08 02 06 11 07', '09 06 10 01 02',
        '06 11 02 08 03', '10 03 04 09 07', '05 01 02 11 09', '04 10 07 08 11',
        '08 05 01 04 06', '06 09 10 02 07', '10 07 06 03 01', '10 09 11 07 02',
        '09 11 04 06 08', '06 04 07 08 10', '07 06 08 10 11', '09 08 03 10 04',
        '07 02 11 10 06', '04 08 09 05 10', '04 02 08 03 07', '01 02 07 10 09',
        '01 07 09 03 08', '11 04 01 06 03', '02 08 03 01 11', '02 01 06 04 03',
        '05 03 09 01 07', '05 03 08 04 07', '01 03 10 09 02', '10 02 05 04 08',
        '05 02 06 11 07', '01 04 07 09 10', '03 10 05 06 07', '09 03 05 07 10',
        '01 05 08 04 03', '09 05 08 03 11', '05 08 02 04 01', '03 11 06 01 04',
        '10 04 06 11 09', '07 05 08 11 10', '08 04 03 07 01', '04 07 10 06 02',
        '03 07 02 11 09', '03 08 11 07 01', '02 05 10 09 01', '05 06 08 09 11',
        '07 05 10 09 06', '07 01 10 08 06', '01 11 05 04 10', '09 07 08 06 04',
        '01 11 08 03 05', '01 07 05 08 04', '07 02 08 04 05'
    ]
    if config.history == len(lis):
        pres = []
        press = []
        for key in model_Key.keys():
            for model_path in model_Key[key]:
                parser = argparse.ArgumentParser()
                parser.add_argument('-premodels',
                                    default=True)  # 是否加载原来的权重 和 vecab
                parser.add_argument('-load_weights', default="weights_" +
                                    key)  # 如果加载预训练的权重,把路径到文件夹,以前的权重和泡菜保存
                parser.add_argument('-premodels_path',
                                    default="model_weights_" +
                                    model_path)  # 预训练的模型文件名
                parser.add_argument('-k', type=int,
                                    default=1)  # topK 感觉不需要,设置为1 就OK 或者将其删除
                parser.add_argument('-max_len', type=int,
                                    default=32)  # 最长长度 需加上起始位置
                parser.add_argument('-d_model', type=int,
                                    default=512)  # 嵌入向量和层的维数(默认为512)
                parser.add_argument('-n_layers', type=int,
                                    default=6)  # 在Transformer模型中有多少层(默认=6)
                parser.add_argument('-heads', type=int,
                                    default=8)  # 需要分割多少个头部以获得多个头部的注意(默认值为8)
                parser.add_argument('-dropout', type=int,
                                    default=0.1)  # 决定多大的dropout将(默认=0.1)
                parser.add_argument('-cuda', default=True, action='store_true')
                parser.add_argument('-floyd', action='store_true')
                opt = parser.parse_args()
                opt.device = 0 if opt.cuda is True else -1
                if opt.device == 0:
                    assert torch.cuda.is_available()
                # assert opt.k > 0
                # assert opt.max_len > 10 # 不需要断言最长长度
                # print(opt.load_weights,opt.premodels_path)
                SRC, TRG = create_fields(opt)
                model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
                if config.tag:
                    opt.text = " <tag> ".join(lis)
                else:
                    opt.text = " ".join(lis)
                result_phrase = translate_one_sentence(opt, model, SRC,
                                                       TRG)  # 预测值
                pres.append(result_phrase)
                press += result_phrase.split(" ")
        print(pres)
        # print("press",len(press),press)
        print(count_(press))

    else:
        for key in model_Key.keys():
            for model_path in model_Key[key]:
                parser = argparse.ArgumentParser()
                parser.add_argument('-premodels',
                                    default=True)  # 是否加载原来的权重 和 vecab
                parser.add_argument('-load_weights', default="weights_" +
                                    key)  # 如果加载预训练的权重,把路径到文件夹,以前的权重和泡菜保存
                parser.add_argument('-premodels_path',
                                    default="model_weights_" +
                                    model_path)  # 预训练的模型文件名
                parser.add_argument('-k', type=int,
                                    default=1)  # topK 感觉不需要,设置为1 就OK 或者将其删除
                parser.add_argument('-max_len', type=int,
                                    default=32)  # 最长长度 需加上起始位置
                parser.add_argument('-d_model', type=int,
                                    default=512)  # 嵌入向量和层的维数(默认为512)
                parser.add_argument('-n_layers', type=int,
                                    default=6)  # 在Transformer模型中有多少层(默认=6)
                parser.add_argument('-heads', type=int,
                                    default=8)  # 需要分割多少个头部以获得多个头部的注意(默认值为8)
                parser.add_argument('-dropout', type=int,
                                    default=0.1)  # 决定多大的dropout将(默认=0.1)
                parser.add_argument('-cuda', default=True, action='store_true')
                parser.add_argument('-floyd', action='store_true')
                opt = parser.parse_args()
                opt.device = 0 if opt.cuda is True else -1
                if opt.device == 0:
                    assert torch.cuda.is_available()
                # assert opt.k > 0
                # assert opt.max_len > 10 # 不需要断言最长长度
                # print(opt.load_weights,opt.premodels_path)
                SRC, TRG = create_fields(opt)
                model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
                num = config.history
                sum = []
                pres = []
                for i, line in enumerate(lis):
                    if i >= num - 1:
                        if config.tag:
                            opt.text = " <tag> ".join(lis[i - num + 1:i + 1])
                        else:
                            opt.text = " ".join(lis[i - num + 1:i + 1])
                        result_phrase = translate_one_sentence(
                            opt, model, SRC, TRG)
                        if i != len(lis) - 1:
                            similar_values = set(
                                result_phrase.split(" ")).intersection(
                                    set(lis[i + 1].split(" ")))
                            sum.append(len(similar_values))
                        pres.append(result_phrase)
                        # press += result_phrase.split(" ")
                total = 0
                for i in sum:
                    total += i
                print(model_path, len(sum), total, total / len(sum), sum)
                print(model_path, pres)
Example #21
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data_train', type=str, default="")
    parser.add_argument('-data_dev', required=True)
    parser.add_argument('-data_test', type=str, default="")
    parser.add_argument('-vocab', required=True)

    parser.add_argument('-epoch', type=int, default=10000)
    parser.add_argument('-batch_size', type=int, default=64)

    #parser.add_argument('-d_word_vec', type=int, default=512)
    parser.add_argument('-d_model', type=int, default=512)
    # parser.add_argument('-d_inner_hid', type=int, default=2048)
    # parser.add_argument('-d_k', type=int, default=64)
    # parser.add_argument('-d_v', type=int, default=64)

    parser.add_argument('-n_heads', type=int, default=8)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-n_warmup_steps', type=int, default=4000)

    parser.add_argument('-dropout', type=float, default=0.1)
    # parser.add_argument('-embs_share_weight', action='store_true')
    # parser.add_argument('-proj_share_weight', action='store_true')

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode', type=str, choices=['all', 'best'], default='best')

    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-label_smoothing', action='store_true')
    parser.add_argument('-num_workers', type=int, default=1)

    parser.add_argument('-cnn_name', type=str, default="resnet101")
    parser.add_argument('-cnn_pretrained_model', type=str, default="")
    parser.add_argument('-joint_enc_func', type=str, default="element_multiplication")
    # parser.add_argument('-comparative_module_name', type=str, default="transformer_encoder")
    parser.add_argument('-lr', type=float, default=0.01)
    # parser.add_argument('-step_size', type=int, default=1000)
    # parser.add_argument('-gamma', type=float, default=0.9)
    parser.add_argument('-crop_size', type=int, default=224)
    parser.add_argument('-max_seq_len', type=int, default=64)
    parser.add_argument('-attribute_len', type=int, default=5)

    parser.add_argument('-pretrained_model', type=str, default="")

    parser.add_argument('-rank_alpha', type=float, default=1.0)
    parser.add_argument('-patience', type=int, default=7)
    parser.add_argument('-bleu_valid_every_n', type=int, default=5)
    parser.add_argument('-data_dev_combined', required=True)
    parser.add_argument('-beam_size', type=int, default=5)
    parser.add_argument('-seed', type=int, default=0)
    parser.add_argument('-attribute_vocab_size', type=int, default=1000)
    parser.add_argument('-add_attribute', action='store_true')

    

    args = parser.parse_args()
    args.cuda = not args.no_cuda
    args.d_word_vec = args.d_model

    args.load_weights = False
    if args.pretrained_model:
        args.load_weights = True

    np.random.seed(0)
    torch.manual_seed(0)
    args.device = torch.device('cuda' if args.cuda else 'cpu')

    log_path = args.log.split("/")
    log_path = "/".join(log_path[:-1])
    if not os.path.exists(log_path):
        os.makedirs(log_path)

    model_path = args.save_model.split("/")
    model_path = "/".join(model_path[:-1])
    if not os.path.exists(model_path):
        os.makedirs(model_path)

    print(args)

    if args.data_train:
        print("======================================start training======================================")
        transform = transforms.Compose([ 
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), 
        transforms.Normalize((0.485, 0.456, 0.406), 
                             (0.229, 0.224, 0.225))])

        transform_dev = transforms.Compose([
            transforms.CenterCrop(args.crop_size),
            transforms.ToTensor(),
            transforms.Normalize((0.485, 0.456, 0.406),
                                 (0.229, 0.224, 0.225))])

        vocab = Vocabulary()
        
        vocab.load(args.vocab)

        args.vocab_size = len(vocab)

        # Build data loader
        data_loader_training = get_loader(args.data_train,
                                             vocab, transform,
                                             args.batch_size, shuffle=True, num_workers=args.num_workers, \
                                             max_seq_len=args.max_seq_len,\
                                             attribute_len=args.attribute_len
                                         )

        data_loader_dev = get_loader(args.data_dev,
                                     vocab, transform_dev,
                                     args.batch_size, shuffle=False, num_workers=args.num_workers, \
                                     max_seq_len=args.max_seq_len,\
                                     attribute_len=args.attribute_len
                                     )

        data_loader_bleu = get_loader_test(args.data_dev_combined,
                                     vocab, transform_dev,
                                     1, shuffle=False,
                                    attribute_len=args.attribute_len
                                     )

        list_of_refs_dev = load_ori_token_data_new(args.data_dev_combined)

        model = get_model(args, load_weights=False)


        print(count_parameters(model))

        # print(model.get_trainable_parameters())
        # init_lr = np.power(args.d_model, -0.5)

        # optimizer = torch.optim.Adam(model.get_trainable_parameters(), lr=init_lr)
        optimizer = get_std_opt(model, args)
        
        train( model, data_loader_training, data_loader_dev, optimizer ,args, vocab, list_of_refs_dev, data_loader_bleu)

    if args.data_test:
        print("======================================start testing==============================")
        args.pretrained_model = args.save_model 
        test(args)
Example #22
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('-src_data', required=True)
    parser.add_argument('-trg_data', required=True)
    parser.add_argument('-src_lang', required=True)
    parser.add_argument('-trg_lang', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.0001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=80)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-savetokens', type=int, default=0)

    opt = parser.parse_args()

    opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(opt.device)

    read_data(opt)
    SRC, TRG = create_fields(opt)

    opt.train = create_dataset(opt, SRC, TRG)

    # # convert translation dictionary to tokens ditionary
    # translation_dictionar = pickle.load(open('data/translation_dictionary.p', 'rb'))
    # new_dict = {}
    # for en_word, fr_word in translation_dictionar.items():
    #     new_dict[SRC.vocab.stoi[en_word]] = TRG.vocab.stoi[fr_word.lower()]

    # pickle.dump(new_dict, open('data/tokenized_translation_dictionary.p', 'wb'))

    model = get_model(opt, len(SRC.vocab), len(TRG.vocab))
    model = model.to(device=opt.device)

    if opt.savetokens == 1:
        pickle.dump(SRC.vocab, open('SRC_vocab.p',
                                    'wb'))  # saves torchtext Vocab object
        pickle.dump(TRG.vocab, open('TRG_vocab.p',
                                    'wb'))  # saves torchtext Vocab object

    opt.optimizer = torch.optim.Adam(model.parameters(),
                                     lr=opt.lr,
                                     betas=(0.9, 0.98),
                                     eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print(
            "model weights will be saved every %d minutes and at end of epoch to directory weights/"
            % (opt.checkpoint))

    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))

    train_model(model, opt)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Example #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-fold', default=0)
    parser.add_argument('-src_lang', default='en')
    parser.add_argument('-trg_lang', default='en')
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-epochs', type=int, default=2)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=1)
    parser.add_argument('-heads', type=int, default=1)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=1500)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=192)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)
    parser.add_argument('-savetokens', type=int, default=0)

    opt = parser.parse_args()
    
    opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(opt.device)
    
    read_data(opt)
    SRC, TRG = create_fields(opt)

    opt.train = create_dataset(opt, SRC, TRG)

    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), model_type='train')
    model = model.to(device=opt.device)

    if opt.savetokens == 1:
        pickle.dump(SRC.vocab, open('SRC_vocab.p', 'wb')) # saves torchtext Vocab object
        pickle.dump(TRG.vocab, open('TRG_vocab.p', 'wb')) # saves torchtext Vocab object
        
    opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    if opt.SGDR == True:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print("model weights will be saved every %d minutes and at end of epoch to directory weights/"%(opt.checkpoint))
    
    if opt.load_weights is not None and opt.floyd is not None:
        os.mkdir('weights')
        pickle.dump(SRC, open('weights/SRC.pkl', 'wb'))
        pickle.dump(TRG, open('weights/TRG.pkl', 'wb'))
    
    # train model
    train_model(model, opt)

    # save weights
    dst = '../gdrive/My Drive/tweet-sentiment-extraction'
    print("saving weights to " + dst + "/...")
    torch.save(model.state_dict(), f'{dst}/model_weights')
    pickle.dump(SRC, open(f'{dst}/SRC.pkl', 'wb'))
    pickle.dump(TRG, open(f'{dst}/TRG.pkl', 'wb'))
    saved_once = 1
    print("weights and field pickles saved to " + dst)
Example #24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-data_path', required=True)
    parser.add_argument('-output_dir', required=True)
    parser.add_argument('-no_cuda', action='store_true')
    parser.add_argument('-SGDR', action='store_true')
    parser.add_argument('-val_check_every_n', type=int, default=3)
    parser.add_argument('-calculate_val_loss', action='store_true')
    parser.add_argument('-tensorboard_graph', action='store_true')
    parser.add_argument('-compositional_eval', action='store_true')
    parser.add_argument('-char_tokenization', action='store_true')
    parser.add_argument('-alex', action='store_true')
    parser.add_argument('-n_val', type=int, default=1000)
    parser.add_argument('-n_test', type=int, default=1000)
    parser.add_argument('-do_test', action='store_true')
    parser.add_argument('-epochs', type=int, default=50)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-heads', type=int, default=8)
    parser.add_argument('-dropout', type=int, default=0.1)
    parser.add_argument('-batchsize', type=int, default=3000)
    parser.add_argument('-printevery', type=int, default=100)
    parser.add_argument('-lr', type=int, default=0.00001)
    parser.add_argument('-load_weights')
    parser.add_argument('-create_valset', action='store_true')
    parser.add_argument('-max_strlen', type=int, default=512)
    parser.add_argument('-floyd', action='store_true')
    parser.add_argument('-checkpoint', type=int, default=0)

    opt = parser.parse_args()

    opt.device = 0 if opt.no_cuda is False else -1
    if opt.device == 0:
        assert torch.cuda.is_available()
        if opt.alex:
            torch.cuda.set_device(1)

    read_data(opt)
    SRC, TRG = create_fields(opt)
    opt.train, opt.val = create_dataset(opt, SRC, TRG)
    model = get_model(opt, len(SRC.vocab), len(TRG.vocab), SRC)

    if opt.tensorboard_graph:
        writer = SummaryWriter('runs')
        for i, batch in enumerate(opt.train):
            src = batch.src.transpose(0, 1).cuda()
            trg = batch.trg.transpose(0, 1).cuda()
            trg_input = trg[:, :-1]
            src_mask, trg_mask = create_masks(src, trg_input, opt)
            writer.add_graph(model, (src, trg_input, src_mask, trg_mask))
            break
        writer.close()

    # beam search parameters
    opt.k = 1
    opt.max_len = opt.max_strlen

    opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9)
    opt.scheduler = ReduceLROnPlateau(opt.optimizer, factor=0.2, patience=5, verbose=True)

    if opt.SGDR:
        opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len)

    if opt.checkpoint > 0:
        print("model weights will be saved every %d minutes and at end of epoch to directory weights/"%(opt.checkpoint))
    
    train_model(model, opt, SRC, TRG)

    if opt.floyd is False:
        promptNextAction(model, opt, SRC, TRG)
Example #25
0
def test(opt):

    transform = transforms.Compose([
        transforms.CenterCrop(opt.crop_size),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    vocab = Vocabulary()

    vocab.load(opt.vocab)

    data_loader = get_loader_test(opt.data_test,
                                  vocab,
                                  transform,
                                  opt.batch_size,
                                  shuffle=False,
                                  attribute_len=opt.attribute_len)

    list_of_refs = load_ori_token_data_new(opt.data_test)

    model = get_model(opt, load_weights=True)

    count = 0

    hypotheses = {}

    model.eval()

    for batch in tqdm(data_loader,
                      mininterval=2,
                      desc='  - (Test)',
                      leave=False):

        image0, image1, image0_attribute, image1_attribute = map(
            lambda x: x.to(opt.device), batch)

        hyp = beam_search(image0, image1, model, opt, vocab, image0_attribute,
                          image1_attribute)
        #         hyp = greedy_search(image1.to(device), image2.to(device), model, opt, vocab)

        hyp = hyp.split("<end>")[0].strip()

        hypotheses[count] = ["it " + hyp]

        count += 1

    # =================================================
    # Set up scorers
    # =================================================
    print('setting up scorers...')
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        # (Meteor(),"METEOR"),
        (Rouge(), "ROUGE_L"),
        # (Cider(), "CIDEr"),
        (Cider(), "CIDEr"),
        (CiderD(), "CIDEr-D")
        # (Spice(), "SPICE")
    ]

    for scorer, method in scorers:
        print('computing %s score...' % (scorer.method()))
        score, scores = scorer.compute_score(list_of_refs, hypotheses)
        if type(method) == list:
            for sc, scs, m in zip(score, scores, method):
                # self.setEval(sc, m)
                # self.setImgToEvalImgs(scs, gts.keys(), m)
                print("%s: %0.3f" % (m, sc))
        else:
            # self.setEval(score, method)
            # self.setImgToEvalImgs(scores, gts.keys(), method)
            print("%s: %0.3f" % (method, score))

    for i in range(len(hypotheses)):
        ref = {i: list_of_refs[i]}
        hyp = {i: hypotheses[i]}
        print(ref)
        print(hyp)
        for scorer, method in scorers:
            print('computing %s score...' % (scorer.method()))
            score, scores = scorer.compute_score(ref, hyp)
            if type(method) == list:
                for sc, scs, m in zip(score, scores, method):
                    # self.setEval(sc, m)
                    # self.setImgToEvalImgs(scs, gts.keys(), m)
                    print("%s: %0.3f" % (m, sc))
            else:
                # self.setEval(score, method)
                # self.setImgToEvalImgs(scores, gts.keys(), method)
                print("%s: %0.3f" % (method, score))