Beispiel #1
0
def load_encoder_decoder(voc, checkpoint, configs):
    """
    Initialize encoder and decoder, and load from file if prev states exists
    :param voc: Vocabulary
    :param checkpoint: dict
    :param configs: dict
    :return: Encoder, LuongAttentionDecoderRNN
    """
    logging.info('Building encoder and decoder ...')

    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, configs["hidden_size"])

    # Initialize encoder & decoder models
    encoder = EncoderRNN(configs["hidden_size"], embedding,
                         configs["encoder_n_layers"], configs["dropout"])
    decoder = LuongAttentionDecoderRNN(embedding, voc.num_words, configs)

    if checkpoint:
        voc.__dict__ = checkpoint['voc_dict']
        embedding.load_state_dict(checkpoint['embedding'])
        encoder.load_state_dict(checkpoint['en'])
        decoder.load_state_dict(checkpoint['de'])

    logging.info('Models built and ready to go!')
    return encoder.to(get_device()), decoder.to(get_device())
Beispiel #2
0
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--dev_files', default='../amr_anno_1.0/data/split/dev/*',
                    help='dev files.')
    ap.add_argument('--log_dir', default='./log',
                    help='log directory')
    ap.add_argument('--exp_name', default='experiment',
                    help='experiment name')
    args = ap.parse_args()
    
    #read dev files
    dev_files = glob.glob(args.dev_files)
    dev_pairs = AMR.read_AMR_files(dev_files, True)
    
    logdir = args.log_dir
    exp_dir = logdir + '/' + args.exp_name
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)
    
    max_iter = 0
    dev_bleu = 0.0
    while True:
        load_state_file = None
        state_files = glob.glob(exp_dir + '/*')
        for sf in state_files:
            iter_num = int(sf.split('_')[1].split('.')[0])
            if iter_num > max_iter:
                max_iter = iter_num
                load_state_file = sf
        if load_state_file is not None:
            state = torch.load(load_state_file)
            amr_vocab = state['amr_vocab']
            en_vocab = state['en_vocab']
            hidden_size = state['hidden_size']
            edge_size = state['edge_size']
            drop = state['dropout']
            mlength = state['max_length']
            logging.info('loaded checkpoint %s', load_state_file)
            
            encoder = EncoderRNN(amr_vocab.n_nodes, hidden_size).to(device)
            child_sum = ChildSum(amr_vocab.n_edges, edge_size, hidden_size).to(device)
            decoder = AttnDecoderRNN(hidden_size, en_vocab.n_words, dropout_p=drop, max_length=mlength).to(device)
            encoder.load_state_dict(state['enc_state'])
            child_sum.load_state_dict(state['sum_state'])
            decoder.load_state_dict(state['dec_state'])
            # translate from the dev set
            translate_random_amr(encoder, child_sum, decoder, dev_pairs, amr_vocab, en_vocab, mlength, n=10)
            translated_amrs = translate_amrs(encoder, child_sum, decoder, dev_pairs, amr_vocab, en_vocab, mlength)
            references = [[pair[0]] for pair in dev_pairs[:len(translated_amrs)]]
            candidates = [sent.split() for sent in translated_amrs]
            dev_bleu = corpus_bleu(references, candidates)
            logging.info('Dev BLEU score: %.2f', dev_bleu)
        else:
            logging.info('No new checkpoint found. Last DEV BLEU score: %.2f', dev_bleu)
        
        time.sleep(20)
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser("English - Lojban translation")
    parser.add_argument("--source", default='loj', help="source language data")
    parser.add_argument("--target", default='en', help="target language data")
    parser.add_argument("--iters",
                        type=int,
                        default=100000,
                        help="number of iterations to train")
    parser.add_argument("--no-train",
                        type=bool,
                        default=False,
                        help="Do not perform training. Only validation")
    parser.add_argument("--pretrain-encoder",
                        help="Path to pretrained encoder")
    parser.add_argument("--pretrain-decoder",
                        help="Path to pretrained decoder")
    parser.add_argument(
        "--pretrain-input-words",
        type=int,
        help="Number of source language words in pretrained model")
    parser.add_argument(
        "--pretrain-output-words",
        type=int,
        help="Number of target language words in pretrained model")
    parser.add_argument("--encoder-ckpt",
                        default="encoder.pth",
                        help="Name of encoder checkpoint filename")
    parser.add_argument("--decoder-ckpt",
                        default="decoder.pth",
                        help="Name of decoder checkpoint filename")
    parser.add_argument("--prefix",
                        default='',
                        help='Prefix, added to data files')
    args = parser.parse_args()

    input_lang, output_lang, pairs, pairs_val = prepare_data(
        args.source, args.target, prefix=args.prefix)
    langs = (input_lang, output_lang)
    print(random.choice(pairs))

    input_words = args.pretrain_input_words or input_lang.n_words
    output_words = args.pretrain_output_words or output_lang.n_words

    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size, output_lang.n_words,
                             dropout_p=0.1).to(device)

    if args.pretrain_encoder and args.pretrain_decoder:
        load_pretrained_model(encoder, decoder, args.pretrain_encoder,
                              args.pretrain_decoder)

    if not args.no_train:
        train(encoder, decoder, args.iters, pairs, langs, print_every=5000)
        torch.save(encoder.state_dict(), args.encoder_ckpt)
        torch.save(decoder.state_dict(), args.decoder_ckpt)

    evaluate_all(encoder, decoder, pairs_val, langs)
Beispiel #4
0
def main():
    # 加载词库,加载数据集
    voc = Lang('data/WORDMAP.json')
    print("词库数量 " + str(voc.n_words))
    train_data = SaDataset('train', voc)
    val_data = SaDataset('valid', voc)

    # 初始化模型
    encoder = EncoderRNN(voc.n_words, hidden_size, encoder_n_layers, dropout)
    # 将模型使用device进行计算,如果是gpu,则会使用显存,如果是cpu,则会使用内存
    encoder = encoder.to(device)

    # 初始化优化器  优化器的目的是让梯度下降,手段是调整模型的参数,optim是一个pytorch的一个包,adam是一个优化算法,梯度下降
    print('Building optimizers ...')
    '''
    需要优化的参数
    学习率
    '''
    optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    # 基础准确率
    best_acc = 0
    epochs_since_improvement = 0

    # epochs 训练的次数
    for epoch in range(0, epochs):
        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(optimizer, 0.8)

        # 训练一次
        train(epoch, train_data, encoder, optimizer)

        # 使用验证集对训练结果进行验证,防止过拟合
        val_acc, val_loss = valid(val_data, encoder)
        print('\n * ACCURACY - {acc:.3f}, LOSS - {loss:.3f}\n'.format(acc=val_acc, loss=val_loss))

        # 检查是否有提升
        is_best = val_acc > best_acc
        best_acc = max(best_acc, val_acc)

        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, encoder, optimizer, val_acc, is_best)

        # Reshuffle samples 将验证集合测试集打乱
        np.random.shuffle(train_data.samples)
        np.random.shuffle(val_data.samples)
def main(opt):
    video_path = opt["video_path"]

    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    image_feats = extract_image_feats(video_path)
    image_feats = torch.from_numpy(image_feats).type(torch.FloatTensor).unsqueeze(0)

    encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]),
                             input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"])
    decoder = DecoderRNN(16860, opt["max_len"], opt["dim_hidden"], opt["dim_word"],
                            input_dropout_p=opt["input_dropout_p"],
                            rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"]))
    model = S2VTAttModel(encoder, decoder).cuda()

    model.load_state_dict(torch.load(opt["saved_model"]))
    model.eval()
    opt = dict()
    opt['child_sum'] = True
    opt['temporal_attention'] = True
    opt['multimodel_attention'] = True
    with torch.no_grad():
        _, seq_preds = model(image_feats.cuda(), mode='inference', opt=opt)
    vocab = json.load(open('data/info.json'))['ix_to_word']
    sent = NLUtils.decode_sequence(vocab, seq_preds)
    print(sent)
Beispiel #6
0
def main(opt):
    dataset = VideoDataset(opt, 'train')
    dataloader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True, num_workers=0, pin_memory=True)
    global dataset_val
    global dataloader_val
    dataset_val =  VideoDataset(opt, 'val')
    dataloader_val = DataLoader(dataset_val, batch_size=opt["batch_size"], shuffle=True, num_workers=0, pin_memory=True)
    opt["vocab_size"] = dataset.get_vocab_size()
    encoder = EncoderRNN(
        opt["dim_vid"],
        opt["dim_hidden"],
        bidirectional=bool(opt["bidirectional"]),
        input_dropout_p=opt["input_dropout_p"],
        rnn_cell=opt['rnn_type'],
        rnn_dropout_p=opt["rnn_dropout_p"])
    decoder = DecoderRNN(
        opt["vocab_size"],
        opt["max_len"],
        opt["dim_hidden"],
        opt["dim_word"],
        input_dropout_p=opt["input_dropout_p"],
        rnn_cell=opt['rnn_type'],
        rnn_dropout_p=opt["rnn_dropout_p"],
        bidirectional=bool(opt["bidirectional"]))
    model = EncoderDecoderModel(encoder, decoder)
    model = model.cuda()
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load('data/save_vatex_batch_noc3d/model_500.pth'))
    crit = utils.LanguageModelCriterion()
    optimizer = optim.Adam(model.parameters(),lr=opt["learning_rate"],weight_decay=opt["weight_decay"])
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=opt["learning_rate_decay_every"],gamma=opt["learning_rate_decay_rate"])
    print("Data Loaded")
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
Beispiel #7
0
def main(opt):
    dataset = VideoDataset(opt, 'inference')
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len

    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    # if torch.cuda.device_count() > 1:
    #     print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
    #     model = nn.DataParallel(model)

    convnet = 'nasnetalarge'
    vocab = dataset.get_vocab()
    full_decoder = ConvS2VT(convnet, model, opt)

    tf_img_fn = ptm_utils.TransformImage(full_decoder.conv)
    load_img_fn = PIL.Image.fromarray

    for video_path in opt['videos']:
        print(video_path)
        with torch.no_grad():
            frames = skvideo.io.vread(video_path)
            # bp ---
            batches = create_batches(frames, load_img_fn, tf_img_fn)
            seq_prob, seq_preds = full_decoder(batches, mode='inference')
            sents = utils.decode_sequence(vocab, seq_preds)

            for sent in sents:
                print(sent)
Beispiel #8
0
def main(args):
    config_path = os.path.join(args.config_path, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    print('[-] Loading pickles')
    dataset_path = Path(config["dataset_path"])
    input_lang = CustomUnpickler(open(dataset_path / 'input_lang.pkl', 'rb')).load()
    output_lang = CustomUnpickler(open(dataset_path / 'output_lang.pkl', 'rb')).load()
    pairs = CustomUnpickler(open(dataset_path / 'pairs.pkl', 'rb')).load()

    # input_lang = load_pkl(dataset_path / 'input_lang.pkl')
    # output_lang = load_pkl(dataset_path / 'output_lang.pkl')
    # pairs = load_pkl(dataset_path / 'pairs.pkl')

    max_len = config["max_len"]
    lr = config["model_cfg"]["lr"]
    hidden_size = config["model_cfg"]["hidden_size"]
    train_iters = args.train_iters
    device = torch.device("cuda:%s" % args.ordinal if torch.cuda.is_available() else "cpu")

    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    attn_decoder = AttnDecoderRNN(hidden_size, output_lang.n_words, max_len, dropout_p=0.1).to(device)

    trainer = Trainer(device, encoder, attn_decoder, input_lang, output_lang, pairs, max_len, lr,
                      ckpt_path=config["models_path"])
    if args.load_models:
        trainer.load_models()
    trainer.run_epoch(train_iters)
Beispiel #9
0
def main(opt):
    dataset = VideoDataset(opt, 'val', 'chinese')
    opt["vocab_size"] = 13491  #dataset.get_vocab_size() + chinDataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    encoder = EncoderRNN(opt["dim_vid"],
                         opt["dim_hidden"],
                         bidirectional=bool(opt["bidirectional"]),
                         input_dropout_p=opt["input_dropout_p"],
                         rnn_cell=opt['rnn_type'],
                         rnn_dropout_p=opt["rnn_dropout_p"])
    decoder = DecoderRNN(opt["vocab_size"],
                         opt["max_len"],
                         opt["dim_hidden"],
                         opt["dim_word"],
                         input_dropout_p=opt["input_dropout_p"],
                         rnn_cell=opt['rnn_type'],
                         rnn_dropout_p=opt["rnn_dropout_p"],
                         bidirectional=bool(opt["bidirectional"]))
    model = S2VTAttModel(encoder, decoder)
    # Setup the model
    model.load_state_dict(
        torch.load(opt["saved_model"], map_location=torch.device('cpu')))
    crit = utils.LanguageModelCriterion()

    test(model, crit, dataset, dataset.get_vocab(), opt)
Beispiel #10
0
def main(opt):
    dataset = VideoDataset(opt, 'test')
    opt.vocab_size = dataset.get_vocab_size()
    opt.seq_length = dataset.seq_length
    if opt.model == 'S2VTModel':
        model = S2VTModel(opt.vocab_size,
                          opt.seq_length,
                          opt.dim_hidden,
                          opt.dim_word,
                          rnn_dropout_p=opt.rnn_dropout_p).cuda()
    elif opt.model == "S2VTAttModel":
        encoder = EncoderRNN(opt.dim_vid, opt.dim_hidden)
        decoder = DecoderRNN(opt.vocab_size,
                             opt.seq_length,
                             opt.dim_hidden,
                             opt.dim_word,
                             rnn_dropout_p=0.2)
        model = S2VTAttModel(encoder, decoder).cuda()
    model = nn.DataParallel(model)
    # Setup the model
    model.load_state_dict(torch.load(opt.saved_model))
    model.eval()
    crit = utils.LanguageModelCriterion()

    test(model, crit, dataset, dataset.get_vocab(), opt)
Beispiel #11
0
def main(opt):
    dataset = VideoDataset(opt, "test")
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          rnn_dropout_p=opt["rnn_dropout_p"]).cuda()
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder).cuda()
    model = nn.DataParallel(model)
    # Setup the model
    model.load_state_dict(torch.load(opt["saved_model"]))
    crit = utils.LanguageModelCriterion()

    get_caption(model, crit, dataset, dataset.get_vocab(), opt)
Beispiel #12
0
def main():
    lang1 = "eng"
    lang2 = "fra"
    f = open("../data/data/" + lang1 + "-" + lang2 + ".txt", encoding='utf-8')
    print(f)
    lines = f.readlines()
    eng_sentences, fra_sentences = data_loaders.getSentences(lines)
    print(len(eng_sentences), len(fra_sentences))
    eng_lang = Lang(lang1)
    eng_lang.parseSentences(eng_sentences)
    fra_lang = Lang(lang2)
    fra_lang.parseSentences(fra_sentences)
    print("No of eng words: ", len(eng_lang.vocab))
    print("No of fra words: ", len(fra_lang.vocab))
    pairs = data_loaders.createPairs(eng_sentences, fra_sentences)
    print("Length of pairs: ", len(pairs))

    hidden_size = 256
    encoder1 = EncoderRNN(len(eng_lang.vocab), hidden_size).to(device)
    attn_decoder1 = DecoderRNN(len(fra_lang.vocab), hidden_size,
                               len(fra_lang.vocab)).to(device)

    train.trainIters(encoder1,
                     attn_decoder1,
                     75000,
                     pairs,
                     eng_lang,
                     fra_lang,
                     print_every=5000)
Beispiel #13
0
def main(opt):
    dataset = VideoDataset(opt, 'train')
    dataloader = DataLoader(dataset, batch_size=opt["batch_size"], shuffle=True)
    opt["vocab_size"] = dataset.get_vocab_size()
    encoder = EncoderRNN(
        opt["dim_vid"],
        opt["dim_hidden"],
        bidirectional=bool(opt["bidirectional"]),
        input_dropout_p=opt["input_dropout_p"],
        rnn_cell=opt['rnn_type'],
        rnn_dropout_p=opt["rnn_dropout_p"])
    decoder = DecoderRNN(
        opt["vocab_size"],
        opt["max_len"],
        opt["dim_hidden"],
        opt["dim_word"],
        input_dropout_p=opt["input_dropout_p"],
        rnn_cell=opt['rnn_type'],
        rnn_dropout_p=opt["rnn_dropout_p"],
        bidirectional=bool(opt["bidirectional"]))
    model = S2VTAttModel(encoder, decoder)
    #model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'], rnn_cell=opt['rnn_type'], n_layers=opt['num_layers'], rnn_dropout_p=opt["rnn_dropout_p"])
    #model = model.cuda()
    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
    optimizer = optim.Adam(
        model.parameters(),
        lr=opt["learning_rate"],
        weight_decay=opt["weight_decay"])
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt["learning_rate_decay_every"],
        gamma=opt["learning_rate_decay_rate"])

    train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
Beispiel #14
0
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    SOS_token = 0
    EOS_token = 1
    MASKED_token = 2
    MAX_LENGTH = 42

    hidden_size = 325
    train_iters = 20
    pretrain_train_iters = 2000
    dataset = 'imdb'
    lang_filename = './data/' + dataset + '_lang.pkl'

    if os.path.exists(lang_filename):
        with open(lang_filename, 'rb') as file:
            (lang, lines) = pkl.load(file)
    else:
        lang, lines = prepareData(dataset)
        with open(lang_filename, 'wb') as file:
            pkl.dump((lang, lines), file)

    pretrained_filename = './pretrained/pretrained_lstm_' + dataset + '_' + str(
        hidden_size) + '_' + str(pretrain_train_iters) + '.pkl'

    model_filename = './pretrained/maskmle_' + dataset + '_' + str(
        hidden_size) + '_' + str(train_iters) + '.pkl'

    if os.path.exists(pretrained_filename):
        with open(pretrained_filename, 'rb') as file:
            pretainedlstm = pkl.load(file)
    else:
        raise NotImplementedError('pretrained lstm is not available')

    encoder1 = EncoderRNN(lang.n_words, hidden_size).to(device)
    attn_decoder1 = AttnDecoderRNN(hidden_size, lang.n_words,
                                   dropout_p=0.1).to(device)
    print("Total number of trainable parameters:",
          count_parameters(encoder1) + count_parameters(attn_decoder1))

    def copy_lstm_weights(from_, *args):
        for to_ in args:
            to_.weight_ih_l0 = from_.weight_ih_l0
            to_.weight_hh_l0 = from_.weight_hh_l0
            to_.bias_ih_l0 = from_.bias_ih_l0
            to_.bias_hh_l0 = from_.bias_hh_l0

    copy_lstm_weights(pretainedlstm.lstm, encoder1.lstm, attn_decoder1.lstm)
    #copy_lstm_weights(pretainedlstm.lstm, attn_decoder1.lstm)

    encoder1.embedding.weight = pretainedlstm.embedding.weight
    attn_decoder1.embedding.weight = pretainedlstm.embedding.weight

    trainIters(encoder1,
               attn_decoder1,
               lang,
               lines,
               train_iters,
               print_every=train_iters // 20,
               plot_every=train_iters // 20)
Beispiel #15
0
def main():
    dataset = 'imdb'
    hidden_size = 325
    train_iters = 40
    pretrain_train_iters = 40
    lang, lines = cachePrepareData(dataset)

    PATH = './pretrained/'
    pretrained_filename = PATH + 'pretrained_lstm_' + dataset + '_' + str(hidden_size) + '_' + str(pretrain_train_iters) + '.pt'
    
    model_filename = 'maskmle_' + dataset + '_' + str(hidden_size) + '_' + str(train_iters) + '.pt'
    
    encoder1 = EncoderRNN(lang.n_words, hidden_size).to(device)
    encoder1.load_state_dict(torch.load(PATH + 'e_' + model_filename))
    
    attn_decoder1 = AttnDecoderRNN(hidden_size, lang.n_words, dropout_p=0.1).to(device)
    attn_decoder1.load_state_dict(torch.load(PATH + 'd_' + model_filename))
    print(evaluateRandomly(encoder1, attn_decoder1, lang, lines, 20, 0.5))
Beispiel #16
0
def main(opt):
    opt_test = opt
    test_dataset = VideoDataset(opt_test, 'test')
    opt_test["vocab_size"] = test_dataset.get_vocab_size()
    opt_test["seq_length"] = test_dataset.max_len
    dataset = VideoDataset(opt, 'train')
    dataloader = DataLoader(dataset,
                            batch_size=opt["batch_size"],
                            shuffle=True)
    opt["vocab_size"] = dataset.get_vocab_size()
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          rnn_cell=opt['rnn_type'],
                          n_layers=opt['num_layers'],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(
            opt["dim_vid"],
            opt["dim_hidden"],
            # bidirectional=opt["bidirectional"],
            input_dropout_p=opt["input_dropout_p"],
            rnn_cell=opt['rnn_type'],
            rnn_dropout_p=opt["rnn_dropout_p"])
        second_lstm = Two_Lstm(
            opt["dim_vid"],
            opt["dim_hidden"],
            # bidirectional=opt["bidirectional"],
            input_dropout_p=opt["input_dropout_p"],
            rnn_cell=opt['rnn_type'],
            rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_cell=opt['rnn_type'],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        # bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, second_lstm, decoder)
    model = model.cuda()
    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["learning_rate"],
                           weight_decay=opt["weight_decay"])
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt["learning_rate_decay_every"],
        gamma=opt["learning_rate_decay_rate"])

    train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit,
          opt_test, test_dataset)
Beispiel #17
0
def main(opt):
    dataset = VideoDataset(opt, "test")
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"], bidirectional=bool(opt["bidirectional"]),input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"])
    decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"],input_dropout_p=opt["input_dropout_p"],rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=bool(opt["bidirectional"]))
    model = EncoderDecoderModel(encoder, decoder).cuda()
    model = nn.DataParallel(model)
    model.load_state_dict(torch.load(opt["saved_model"]))
    crit = utils.LanguageModelCriterion()
    test(model, crit, dataset, dataset.get_vocab(), opt)
Beispiel #18
0
def main():
    embedding = nn.Embedding(voc.num_words, hidden_size)
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size,
                                  voc.num_words, decoder_n_layers, dropout)

    model = torch.load(model_save_pth, 'cpu')

    encoder.load_state_dict(torch.load(model_save_pth, device)['en'])
    decoder.load_state_dict(torch.load(model_save_pth, device)['de'])

    #encoder = model['en']
    #decoder = model.LuongAttnDecoderRNN['de']

    #encoder = encoder.to(device)
    #decoder = decoder.to(device)
    encoder.eval()
    decoder.eval()

    searcher = GreedySearchDecoder(encoder, decoder)

    for sentence in pick_n_valid_sentences(10):
        decoded_words = evaluate(searcher, sentence)
        print('Human: {}'.format(sentence))
        print('Bot: {}'.format(''.join(decoded_words)))
Beispiel #19
0
def main(opt):

    dataset = VideoDataset(opt, 'train')
    dataloader = DataLoader(dataset,
                            batch_size=opt["batch_size"],
                            num_workers=8,
                            shuffle=True)
    opt["vocab_size"] = dataset.get_vocab_size()
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          rnn_cell=opt['rnn_type'],
                          n_layers=opt['num_layers'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"]).cuda()
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_cell=opt['rnn_type'],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             n_layers=opt['num_layers'],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_cell=opt['rnn_type'],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder).cuda()
    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["learning_rate"],
                           weight_decay=opt["weight_decay"])
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt["learning_rate_decay_every"],
        gamma=opt["learning_rate_decay_rate"])

    model.load_state_dict(
        torch.load(
            "C:\\Users\\Shumpu\\VideoCaptioningAttack\\video_caption_pytorch\\save\\vgg16_model_460.pth"
        ))
    train(dataloader, model, crit, optimizer, exp_lr_scheduler, opt, rl_crit)
Beispiel #20
0
def main():
    with open("data/vocab.pkl", 'rb') as f:
        vocab = pickle.load(f)

    img_path = "data/flickr7k_images"
    cap_path = "data/factual_train.txt"
    styled_path = "data/humor/funny_train.txt"
    data_loader = get_data_loader(img_path, cap_path, vocab, 3)
    styled_data_loader = get_styled_data_loader(styled_path, vocab, 3)

    encoder = EncoderRNN(voc_size=60376, emb_size=300, hidden_size=300)
    decoder = FactoredLSTM(300, 512, 512, len(vocab))

    if torch.cuda.is_available():
        encoder = encoder.cuda()
        decoder = decoder.cuda()

    # for i, (images, captions, lengths) in enumerate(data_loader):
    for i, (captions, lengths) in enumerate(styled_data_loader):
        # images = Variable(images, volatile=True)
        captions = Variable(captions.long())

        if torch.cuda.is_available():
            # images = images.cuda()
            captions = captions.cuda()

        # features = encoder(images)

        outputs = decoder(captions, features=None, mode="humorous")
        print(lengths - 1)
        print(outputs)
        print(captions[:, 1:])

        loss = masked_cross_entropy(outputs, captions[:, 1:].contiguous(), lengths - 1)

        print(loss)

        break
Beispiel #21
0
def main():
    # load vocablary
    with open('data/vocab.pkl', 'rb') as f:
        vocab = pickle.load(f)

    # build model
    encoder = EncoderRNN(voc_size=60736, emb_size=300, hidden_size=300)
    decoder = FactoredLSTM(300, 512, 512, len(vocab))

    encoder.load_state_dict(torch.load('pretrained_models/encoder-4.pkl'))
    decoder.load_state_dict(torch.load('pretrained_models/decoder-4.pkl'))

    # prepare images
    # transform = transforms.Compose([
    #     Rescale((224, 224)),
    #     transforms.ToTensor()
    #     ])
    # img_names, img_list = load_sample_images('sample_images/', transform)
    # image = to_var(img_list[30], volatile=True)

    data_loader = get_data_loader('', 'data/factual_train.txt', vocab, 1)

    # if torch.cuda.is_available():
    #     encoder = encoder.cuda()
    #     decoder = decoder.cuda()

    for i, (messages, m_lengths, targets, t_lengths) in enumerate(data_loader):
        print(''.join([vocab.i2w[x] for x in messages[0]]))
        messages = to_var(messages.long())
        targets = to_var(targets.long())

        # forward, backward and optimize
        output, features = encoder(messages, list(m_lengths))
        outputs = decoder.sample(features, mode="humorous")
        caption = [vocab.i2w[x] for x in outputs]
        print(''.join(caption))
        print('-------')
Beispiel #22
0
def init():
    print("\tInitialising sentences")

    print("\t\tLoading and cleaning json files")
    json_of_convs = load_all_json_conv('./Dataset/messages')

    print("\t\tLoading two person convs")
    duo_conversations = get_chat_friend_and_me(json_of_convs)

    print("\t\tMaking two person convs discussions")
    discussions = get_discussions(duo_conversations)

    print("\t\tCreating pairs for training")
    pairs_of_sentences = make_pairs(discussions)
    print(f"\t\t{len(pairs_of_sentences)} different pairs")

    print("\t\tCreating Vocabulary")
    voc = Voc()

    print("\t\tPopulating Vocabulary")
    voc.createVocFromPairs(pairs_of_sentences)
    print(f"\t\tVocabulary of : {voc.num_words} differents words")

    print('\tBuilding encoder and decoder ...')
    embedding = nn.Embedding(voc.num_words, HIDDEN_SIZE)
    encoder = EncoderRNN(HIDDEN_SIZE, embedding, ENCODER_N_LAYERS, DROPOUT)
    decoder = LuongAttnDecoderRNN(ATTN_MODEL, embedding, HIDDEN_SIZE,
                                  voc.num_words, DECODER_N_LAYERS, DROPOUT)
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=LEARNING_RATE)
    decoder_optimizer = optim.Adam(decoder.parameters(),
                                   lr=LEARNING_RATE * DECODER_LEARNING_RATIO)
    checkpoint = None
    if LOADFILENAME:
        print("\t\tLoading last training")
        checkpoint = torch.load(LOADFILENAME)
        # If loading a model trained on GPU to CPU
        # checkpoint=torch.load(loadFilename,map_location=torch.device('cpu'))
        encoder_sd = checkpoint['en']
        decoder_sd = checkpoint['de']
        encoder_optimizer_sd = checkpoint['en_opt']
        decoder_optimizer_sd = checkpoint['de_opt']
        embedding_sd = checkpoint['embedding']
        voc.__dict__ = checkpoint['voc_dict']
        print("\t\tPopulating from last training")
        embedding.load_state_dict(embedding_sd)
        encoder.load_state_dict(encoder_sd)
        decoder.load_state_dict(decoder_sd)
        encoder_optimizer.load_state_dict(encoder_optimizer_sd)
        decoder_optimizer.load_state_dict(decoder_optimizer_sd)

    encoder = encoder.to(DEVICE)
    decoder = decoder.to(DEVICE)
    return (encoder, decoder, encoder_optimizer, decoder_optimizer, embedding,
            voc, pairs_of_sentences, checkpoint)
Beispiel #23
0
def setup():
    global char2index
    global index2char
    global SOS_token
    global EOS_token
    global PAD_token

    global model
    global device

    char2index, index2char = label_loader.load_label_json(
        "../data/kor_syllable_zeroth.json")
    SOS_token = char2index['<s>']
    EOS_token = char2index['</s>']
    PAD_token = char2index['_']

    print(f"device: {device}")

    input_size = int(161)
    enc = EncoderRNN(input_size,
                     512,
                     n_layers=3,
                     dropout_p=0.3,
                     bidirectional=True,
                     rnn_cell='LSTM',
                     variable_lengths=False)

    dec = DecoderRNN(len(char2index),
                     128,
                     512,
                     512,
                     SOS_token,
                     EOS_token,
                     n_layers=2,
                     rnn_cell='LSTM',
                     dropout_p=0.3,
                     bidirectional_encoder=True)

    model = Seq2Seq(enc, dec).to(device)

    model_path = "../models/zeroth_korean_trimmed/LSTM_512x3_512x2_zeroth_korean_trimmed/final.pth"
    print("Loading checkpoint model %s" % model_path)
    state = torch.load(model_path, map_location=device)
    model.load_state_dict(state['model'])
    print('Model loaded')
    def main(self, opt):
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        video_path = self.ent1.get().replace("/", "\\")
        image_feats = self.extract_image_feats(video_path)
        image_feats = torch.from_numpy(image_feats).type(
            torch.FloatTensor).unsqueeze(0)

        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             bidirectional=bool(opt["bidirectional"]),
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(16860,
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=bool(opt["bidirectional"]))
        model = S2VTAttModel(encoder, decoder).cuda()
        model.load_state_dict(torch.load("data/save/model_500.pth"))
        model.eval()
        opt = dict()
        opt['child_sum'] = True
        opt['temporal_attention'] = True
        opt['multimodel_attention'] = True
        with torch.no_grad():
            _, seq_preds = model(image_feats.cuda(), mode='inference', opt=opt)
        vocab = json.load(open('data/info.json'))['ix_to_word']
        self.sent = NLUtils.decode_sequence(vocab, seq_preds)
        hasil = self.translator.translate(self.sent[0], dest='id')
        print(self.sent[0])
        self.hasilPred.configure(text=self.sent[0])
        self.hasiltrans.configure(text=hasil.text)
        # coba = self.sent[0]
        self.textToSpeech(self.sent[0], hasil.text)
        del seq_preds
        torch.cuda.empty_cache()
Beispiel #25
0
def main(opt):
    train_dataset = VideoDataset(opt, 'train')
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=opt.batch_size,
                                  shuffle=True)
    opt.vocab_size = train_dataset.vocab_size
    opt.seq_length = train_dataset.seq_length
    val_dataset = VideoDataset(opt, 'val')
    val_dataloader = DataLoader(val_dataset,
                                batch_size=opt.batch_size,
                                shuffle=True)
    if opt.model == 'S2VTModel':
        model = S2VTModel(opt.vocab_size,
                          opt.seq_length,
                          opt.dim_hidden,
                          opt.dim_word,
                          rnn_dropout_p=opt.rnn_dropout_p).cuda()
    elif opt.model == "Vid2seq":
        encoder = EncoderRNN(opt.dim_vid, opt.dim_hidden)
        decoder = DecoderRNN(opt.vocab_size,
                             opt.seq_length,
                             opt.dim_hidden,
                             use_attention=True,
                             rnn_dropout_p=opt.rnn_dropout_p)
        model = Vid2seq(encoder, decoder).cuda()
    crit = utils.LanguageModelCriterion()
    rl_crit = utils.RewardCriterion()
    optimizer = optim.Adam(model.parameters(),
                           lr=opt.learning_rate,
                           weight_decay=opt.weight_decay)
    exp_lr_scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=opt.learning_rate_decay_every,
        gamma=opt.learning_rate_decay_rate)
    if not os.path.isdir(opt.checkpoint_path):
        os.mkdir(opt.checkpoint_path)
    train(train_dataloader, val_dataloader, model, crit, optimizer,
          exp_lr_scheduler, opt, rl_crit)
Beispiel #26
0
def main(opt):
    dataset = VideoDataset(opt, "test")
    opt["vocab_size"] = dataset.get_vocab_size()
    opt["seq_length"] = dataset.max_len
    if opt['beam_size'] != 1:
        assert opt["batch_size"] == 1
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"], opt['dim_vid'],
                          n_layers=opt['num_layers'],
                          rnn_cell=opt['rnn_type'],
                          bidirectional=opt["bidirectional"],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"], opt["dim_hidden"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'], bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"], rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["vocab_size"], opt["max_len"], opt["dim_hidden"], opt["dim_word"],
                             n_layers=opt['num_layers'],
                             rnn_cell=opt['rnn_type'], input_dropout_p=opt["input_dropout_p"],
                             rnn_dropout_p=opt["rnn_dropout_p"], bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    else:
        return

    if torch.cuda.device_count() > 1:
        print("{} devices detected, switch to parallel model.".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Setup the model
    model.load_state_dict(torch.load(opt["saved_model"]))
    crit = utils.LanguageModelCriterion()

    test(model, crit, dataset, dataset.get_vocab(), opt)
Beispiel #27
0
def main(opt):
    dataset_test = VideoDataset(opt, 'test')
    dataloader_test = DataLoader(dataset_test,
                                 batch_size=opt["batch_size"],
                                 shuffle=False)
    opt["obj_vocab_size"] = dataset_test.get_obj_vocab_size()
    opt["rel_vocab_size"] = dataset_test.get_rel_vocab_size()
    if opt["model"] == 'S2VTModel':
        model = S2VTModel(opt["vocab_size"],
                          opt["max_len"],
                          opt["dim_hidden"],
                          opt["dim_word"],
                          opt['dim_vid'],
                          rnn_cell=opt['rnn_type'],
                          n_layers=opt['num_layers'],
                          rnn_dropout_p=opt["rnn_dropout_p"])
    elif opt["model"] == "S2VTAttModel":
        encoder = EncoderRNN(opt["dim_vid"],
                             opt["dim_hidden"],
                             bidirectional=opt["bidirectional"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_cell=opt['rnn_type'],
                             rnn_dropout_p=opt["rnn_dropout_p"])
        decoder = DecoderRNN(opt["obj_vocab_size"],
                             opt["rel_vocab_size"],
                             opt["max_len"],
                             opt["dim_hidden"],
                             opt["dim_word"],
                             input_dropout_p=opt["input_dropout_p"],
                             rnn_cell=opt['rnn_type'],
                             rnn_dropout_p=opt["rnn_dropout_p"],
                             bidirectional=opt["bidirectional"])
        model = S2VTAttModel(encoder, decoder)
    model = model.cuda()
    model.load_state_dict(torch.load(opt['ckpt_path']))
    crit = utils.ObjRelCriterion()
    test(model, crit, opt, dataloader_test)
Beispiel #28
0
def main(args):
    config_path = os.path.join(args.config_path, 'config.json')
    with open(config_path) as f:
        config = json.load(f)

    print('[-] Loading pickles')
    dataset_path = Path(config["dataset_path"])
    input_lang = CustomUnpickler(open(dataset_path / 'input_lang.pkl',
                                      'rb')).load()
    output_lang = CustomUnpickler(open(dataset_path / 'output_lang.pkl',
                                       'rb')).load()
    pairs = CustomUnpickler(open(dataset_path / 'pairs.pkl', 'rb')).load()
    # input_lang = load_pkl(dataset_path / 'input_lang.pkl')
    # output_lang = load_pkl(dataset_path / 'output_lang.pkl')
    # pairs = load_pkl(dataset_path / 'pairs.pkl')

    hidden_size = config["model_cfg"]["hidden_size"]
    max_len = config["max_len"]
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size,
                             output_lang.n_words,
                             max_len,
                             dropout_p=0.1).to(device)

    print('[-] Loading models')
    ckpt = torch.load(config["models_path"] + 'models.ckpt')
    encoder.load_state_dict(ckpt['encoder'])
    encoder.to(device)
    decoder.load_state_dict(ckpt['decoder'])
    decoder.to(device)

    evaluator = Evaluater(device, encoder, decoder, input_lang, output_lang,
                          max_len)

    # Evaluate random samples
    evaluator.evaluateRandomly(pairs)

    evaluator.evaluateAndShowAttention("elle a cinq ans de moins que moi .")
    # evaluator.evaluateAndShowAttention("elle est trop petit .")
    # evaluator.evaluateAndShowAttention("je ne crains pas de mourir .")
    # evaluator.evaluateAndShowAttention("c est un jeune directeur plein de talent .")
    plt.savefig('attention.png')
Beispiel #29
0
def main():

    global char2index
    global index2char
    global SOS_token
    global EOS_token
    global PAD_token

    parser = argparse.ArgumentParser(description='Speech hackathon Baseline')
    parser.add_argument('--hidden_size',
                        type=int,
                        default=512,
                        help='hidden size of model (default: 256)')
    parser.add_argument('--layer_size',
                        type=int,
                        default=3,
                        help='number of layers of model (default: 3)')
    parser.add_argument('--dropout',
                        type=float,
                        default=0.2,
                        help='dropout rate in training (default: 0.2)')
    parser.add_argument(
        '--bidirectional',
        action='store_true',
        help='use bidirectional RNN for encoder (default: False)')
    parser.add_argument(
        '--use_attention',
        action='store_true',
        help='use attention between encoder-decoder (default: False)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='batch size in training (default: 32)')
    parser.add_argument(
        '--workers',
        type=int,
        default=4,
        help='number of workers in dataset loader (default: 4)')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=10,
                        help='number of max epochs in training (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-04,
                        help='learning rate (default: 0.0001)')
    parser.add_argument('--teacher_forcing',
                        type=float,
                        default=0.5,
                        help='teacher forcing ratio in decoder (default: 0.5)')
    parser.add_argument('--max_len',
                        type=int,
                        default=80,
                        help='maximum characters of sentence (default: 80)')
    parser.add_argument('--no_cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument('--save_name',
                        type=str,
                        default='model',
                        help='the name of model in nsml or local')
    parser.add_argument('--mode', type=str, default='train')
    parser.add_argument("--pause", type=int, default=0)

    args = parser.parse_args()

    char2index, index2char = label_loader.load_label('./hackathon.labels')
    SOS_token = char2index['<s>']
    EOS_token = char2index['</s>']
    PAD_token = char2index['_']

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    args.cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device('cuda' if args.cuda else 'cpu')

    # N_FFT: defined in loader.py
    feature_size = N_FFT / 2 + 1

    enc = EncoderRNN(feature_size,
                     args.hidden_size,
                     input_dropout_p=args.dropout,
                     dropout_p=args.dropout,
                     n_layers=args.layer_size,
                     bidirectional=args.bidirectional,
                     rnn_cell='gru',
                     variable_lengths=False)

    dec = DecoderRNN(len(char2index),
                     args.max_len,
                     args.hidden_size * (2 if args.bidirectional else 1),
                     SOS_token,
                     EOS_token,
                     n_layers=args.layer_size,
                     rnn_cell='gru',
                     bidirectional=args.bidirectional,
                     input_dropout_p=args.dropout,
                     dropout_p=args.dropout,
                     use_attention=args.use_attention)

    model = Seq2seq(enc, dec)
    model.flatten_parameters()

    for param in model.parameters():
        param.data.uniform_(-0.08, 0.08)

    # lnw add get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model = nn.DataParallel(model).to(device)

    optimizer = optim.Adam(model.module.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss(reduction='sum',
                                    ignore_index=PAD_token).to(device)

    bind_model(model, optimizer)

    if args.pause == 1:
        nsml.paused(scope=locals())

    if args.mode != "train":
        return

    data_list = os.path.join(DATASET_PATH, 'train_data', 'data_list.csv')
    wav_paths = list()
    script_paths = list()

    with open(data_list, 'r') as f:
        for line in f:
            # line: "aaa.wav,aaa.label"

            wav_path, script_path = line.strip().split(',')
            wav_paths.append(os.path.join(DATASET_PATH, 'train_data',
                                          wav_path))
            script_paths.append(
                os.path.join(DATASET_PATH, 'train_data', script_path))

    best_loss = 1e10
    begin_epoch = 0

    # load all target scripts for reducing disk i/o
    target_path = os.path.join(DATASET_PATH, 'train_label')
    load_targets(target_path)

    # lnw  valid_ratio=0.05 ->  valid_ratio=0.1  or 0.03
    #train_batch_num, train_dataset_list, valid_dataset = split_dataset(args, wav_paths, script_paths, valid_ratio=0.05)
    train_batch_num, train_dataset_list, valid_dataset = split_dataset(
        args, wav_paths, script_paths, valid_ratio=0.03)

    #lnw add
    lstart_time = datetime.now()
    print("Start time : " + str(lstart_time))

    #lnw block
    #logger.info('start')

    train_begin = time.time()

    for epoch in range(begin_epoch, args.max_epochs):

        #lnw add
        lepoch_start = datetime.now()
        print(epoch, "epoch Start time : " + str(lepoch_start))

        train_queue = queue.Queue(args.workers * 2)

        train_loader = MultiLoader(train_dataset_list, train_queue,
                                   args.batch_size, args.workers)
        train_loader.start()

        #lnw modified  print_batch 10 -> 100, 450
        #train_loss, train_cer = train(model, train_batch_num, train_queue, criterion, optimizer, device, train_begin, args.workers, 10, args.teacher_forcing)
        train_loss, train_cer = train(model, train_batch_num, train_queue,
                                      criterion, optimizer, device,
                                      train_begin, args.workers, 450,
                                      args.teacher_forcing)

        logger.info('Epoch %d (Training) Loss %0.4f CER %0.4f' %
                    (epoch, train_loss, train_cer))

        train_loader.join()

        valid_queue = queue.Queue(args.workers * 2)
        valid_loader = BaseDataLoader(valid_dataset, valid_queue,
                                      args.batch_size, 0)
        valid_loader.start()

        eval_loss, eval_cer = evaluate(model, valid_loader, valid_queue,
                                       criterion, device)
        logger.info('Epoch %d (Evaluate) Loss %0.4f CER %0.4f' %
                    (epoch, eval_loss, eval_cer))

        valid_loader.join()

        nsml.report(False,
                    step=epoch,
                    train_epoch__loss=train_loss,
                    train_epoch__cer=train_cer,
                    eval__loss=eval_loss,
                    eval__cer=eval_cer)

        best_model = (eval_loss < best_loss)
        nsml.save(args.save_name)

        if best_model:
            nsml.save('best')
            best_loss = eval_loss

            #lnw add. save best model
            torch.save(model, 'ModelBestSave.pt')

        #lnw end time, duration
        lepoch_end = datetime.now()
        print(epoch, "epoch End time: " + str(lepoch_end), "Duration:",
              str(lepoch_end - lepoch_start), "SratTime-NowTime:",
              str(lepoch_end - lstart_time))

    #lnw add
    lend_time = datetime.now()
    print("End time : " + str(lend_time))
    print('Duration: {}'.format(lend_time - lstart_time))
Beispiel #30
0
def main():
    input_lang = Lang('data/WORDMAP_en.json')
    output_lang = Lang('data/WORDMAP_zh.json')
    print("input_lang.n_words: " + str(input_lang.n_words))
    print("output_lang.n_words: " + str(output_lang.n_words))

    train_data = TranslationDataset('train')
    val_data = TranslationDataset('valid')

    # Initialize encoder & decoder models
    encoder = EncoderRNN(input_lang.n_words, hidden_size, encoder_n_layers,
                         dropout)
    decoder = LuongAttnDecoderRNN(attn_model, hidden_size, output_lang.n_words,
                                  decoder_n_layers, dropout)

    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Initialize optimizers
    print('Building optimizers ...')
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)

    # Initializations
    print('Initializing ...')
    train_batch_time = ExpoAverageMeter()  # forward prop. + back prop. time
    train_losses = ExpoAverageMeter()  # loss (per word decoded)
    val_batch_time = ExpoAverageMeter()
    val_losses = ExpoAverageMeter()

    best_loss = 100000
    epochs_since_improvement = 0

    # Epochs
    for epoch in range(start_epoch, epochs):
        # Decay learning rate if there is no improvement for 8 consecutive epochs, and terminate training after 20
        if epochs_since_improvement == 20:
            break
        if epochs_since_improvement > 0 and epochs_since_improvement % 8 == 0:
            adjust_learning_rate(decoder_optimizer, 0.8)
            adjust_learning_rate(encoder_optimizer, 0.8)

        # One epoch's training
        # Ensure dropout layers are in train mode
        encoder.train()
        decoder.train()

        start = time.time()

        # Batches
        for i_batch in range(len(train_data)):
            input_variable, lengths, target_variable, mask, max_target_len = train_data[
                i_batch]
            train_loss = train(input_variable, lengths, target_variable, mask,
                               max_target_len, encoder, decoder,
                               encoder_optimizer, decoder_optimizer)

            # Keep track of metrics
            train_losses.update(train_loss)
            train_batch_time.update(time.time() - start)

            start = time.time()

            # Print status
            if i_batch % print_every == 0:
                print(
                    '[{0}] Epoch: [{1}][{2}/{3}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                        timestamp(),
                        epoch,
                        i_batch,
                        len(train_data),
                        batch_time=train_batch_time,
                        loss=train_losses))

        # One epoch's validation
        start = time.time()

        # Batches
        for i_batch in range(len(val_data)):
            input_variable, lengths, target_variable, mask, max_target_len = val_data[
                i_batch]
            val_loss = valid(input_variable, lengths, target_variable, mask,
                             max_target_len, encoder, decoder)

            # Keep track of metrics
            val_losses.update(val_loss)
            val_batch_time.update(time.time() - start)

            start = time.time()

            # Print status
            if i_batch % print_every == 0:
                print(
                    'Validation: [{0}/{1}]\t'
                    'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                        i_batch,
                        len(val_data),
                        batch_time=val_batch_time,
                        loss=val_losses))

        val_loss = val_losses.avg
        print('\n * LOSS - {loss:.3f}\n'.format(loss=val_loss))

        # Check if there was an improvement
        is_best = val_loss < best_loss
        best_loss = min(best_loss, val_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" %
                  (epochs_since_improvement, ))
        else:
            epochs_since_improvement = 0

        save_checkpoint(epoch, encoder, decoder, encoder_optimizer,
                        decoder_optimizer, input_lang, output_lang, val_loss,
                        is_best)

        # Initialize search module
        searcher = GreedySearchDecoder(encoder, decoder)
        for input_sentence, target_sentence in pick_n_valid_sentences(
                input_lang, output_lang, 10):
            decoded_words = evaluate(searcher, input_sentence, input_lang,
                                     output_lang)
            print('> {}'.format(input_sentence))
            print('= {}'.format(target_sentence))
            print('< {}'.format(''.join(decoded_words)))

        # Reshuffle train and valid samples
        np.random.shuffle(train_data.samples)
        np.random.shuffle(val_data.samples)