Example #1
0
def run():
    USE_CUDA = torch.cuda.is_available()

    config_path = os.path.join("experiments", FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    dataset = ToyDataset(5, 15)
    eval_dataset = ToyDataset(5, 15, type='eval')
    BATCHSIZE = 30
    train_loader = data.DataLoader(dataset,
                                   batch_size=BATCHSIZE,
                                   shuffle=False,
                                   collate_fn=pad_collate,
                                   drop_last=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=BATCHSIZE,
                                  shuffle=False,
                                  collate_fn=pad_collate,
                                  drop_last=True)
    config["batch_size"] = BATCHSIZE

    # Models
    model = Seq2Seq(config)

    if USE_CUDA:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    print("=" * 60)
    print(model)
    print("=" * 60)
    for k, v in sorted(config.items(), key=lambda i: i[0]):
        print(" (" + k + ") : " + str(v))
    print()
    print("=" * 60)

    print("\nInitializing weights...")
    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs, FLAGS.train_size)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        # print("My test: ", model('abcd'))
        model, optimizer = train(model, optimizer, train_loader, run_state)
        evaluate(model, eval_loader)
Example #2
0
def train(vocabs, batch_gen, train_params, model_params):
    word2int, int2word = vocabs
    num_epoch = train_params['num_epoch']
    learn_rate = train_params['learn_rate']
    clip = train_params['clip']
    eval_every = train_params['eval_every']
    train_tf_ratio = train_params['train_tf_ratio']
    val_tf_ratio = train_params['val_tf_ratio']

    net = Seq2Seq(vocabs=vocabs, device=device, **model_params).to(device)
    net.train()

    opt = optim.Adam(net.parameters(), lr=learn_rate)
    weights = calc_class_weights(batch_gen.data_dict, batch_gen.label_dict)
    criterion = nn.CrossEntropyLoss(weight=weights, ignore_index=word2int['<pad>'])

    print('Training is starting ...')
    train_loss_list = []
    val_loss_list = []
    for epoch in range(num_epoch):
        running_loss = 0

        for idx, (x_cap, y_cap) in enumerate(batch_gen.generate('train')):
            print('\rtrain:{}/{}'.format(idx, batch_gen.num_iter('train')), flush=True, end='')
            x_cap, y_cap = x_cap.to(device), y_cap.to(device)

            opt.zero_grad()
            output = net(x_cap, y_cap, train_tf_ratio)

            loss = criterion(output.view(-1, output.size(2)), y_cap.view(-1).long())
            loss.backward()

            nn.utils.clip_grad_norm_(net.parameters(), clip)
            opt.step()

            running_loss += loss.item()

            if (idx+1) % eval_every == 0:
                print('\n')
                val_loss = evaluate(net, word2int, batch_gen, weights, val_tf_ratio)
                print("\nEpoch: {}/{}...".format(epoch + 1, num_epoch),
                      "Step: {}...".format(idx),
                      "Loss: {:.4f}...".format(running_loss / idx),
                      "Val Loss: {:.4f}\n".format(val_loss))

        print('\nCreating sample captions')
        sample(net, vocabs, generator=batch_gen.generate('validation'))
        print('\n')

        train_loss_list.append(running_loss / idx)
        val_loss_list.append(val_loss)

        loss_file = open('results/losses.pkl', 'wb')
        model_file = open('results/seq2seq.pkl', 'wb')
        pickle.dump([train_loss_list, val_loss_list], loss_file)
        pickle.dump(net, model_file)

    print('Training finished, saving the model')
    model_file = open('seq2seq.pkl', 'wb')
    pickle.dump(net, model_file)
Example #3
0
def evaluate():
    reader = PWKPReader()
    vocab = Vocabulary.from_files(vocab_dir)
    iterator = BasicIterator(batch_size=opt.batch_size)
    iterator.index_with(vocab)

    model = Seq2Seq(emb_size=opt.emb_size,
                    hidden_size=opt.hidden_size,
                    enc_layers=opt.enc_layers,
                    dec_layers=opt.dec_layers,
                    dropout=opt.dropout,
                    bidirectional=opt.bidirectional,
                    beam_size=opt.beam_size,
                    label_smoothing=opt.label_smoothing,
                    vocab=vocab)

    model = model.cuda(opt.gpu)
    model_state = torch.load(opt.restore, map_location=util.device_mapping(-1))
    model.load_state_dict(model_state)

    predictor = Predictor(iterator=iterator,
                          max_decoding_step=opt.max_step,
                          vocab=vocab,
                          reader=reader,
                          data_path=test_path,
                          log_dir=save_dir,
                          map_path=ner_path,
                          cuda_device=opt.gpu)

    predictor.evaluate(model)
def instantiate_model(model_name, vocab_size, embed_dim, hidden_dim, lr, bidirectional_encoder, 
                      max_encoder_len, max_decoder_len, eos_token, device=DEVICE,
                      decoder_num_layers=2, dropout_rate=0.5, embedding_weights=None):
    
    attention = None
    
    if model_name == SEQ2SEQ:
        model = Seq2Seq(vocab_size, embed_dim, hidden_dim, max_encoder_len, max_decoder_len, 
                        eos_token, bidirectional_encoder=bidirectional_encoder, num_decoder_layers=decoder_num_layers,
                        dropout_rate=dropout_rate, embedding_weights=embedding_weights)
        attention = Attention(hidden_dim, embed_dim, max_encoder_len)
    else:
        raise ValueError('wrong value for model_name')
        
    print('model created')
    
    model.to(device)
    if not(attention == None):
        attention.to(device)
    print('model moved to device: ', device)

    optimizer = Adam(model.parameters(), lr=lr)
    print('optimizer created')
    
    loss_function = CrossEntropyLoss(ignore_index=0, reduction='mean')
    print('loss function created')
    
    return model, attention, optimizer, loss_function
def test(test_loader, modelID, showAttn=True):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).cuda()
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).cuda()
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda()
    model_file = 'save_weights/seq2seq-' + str(modelID) + '.model'
    pretrain_dict = torch.load(model_file)
    seq2seq_dict = seq2seq.state_dict()
    pretrain_dict = {
        k: v
        for k, v in pretrain_dict.items() if k in seq2seq_dict
    }
    seq2seq_dict.update(pretrain_dict)
    seq2seq.load_state_dict(seq2seq_dict)  #load
    print('Loading ' + model_file)

    seq2seq.eval()
    total_loss_t = 0
    start_t = time.time()
    for num, (test_index, test_in, test_in_len, test_out,
              test_domain) in enumerate(test_loader):
        lambd = LAMBD
        test_in, test_out = Variable(test_in, volatile=True).cuda(), Variable(
            test_out, volatile=True).cuda()
        test_domain = Variable(test_domain, volatile=True).cuda()
        output_t, attn_weights_t, out_domain_t = seq2seq(test_in,
                                                         test_out,
                                                         test_in_len,
                                                         lambd,
                                                         teacher_rate=False,
                                                         train=False)
        batch_count_n = writePredict(modelID, test_index, output_t, 'test')
        test_label = test_out.permute(1, 0)[1:].contiguous().view(-1)
        if LABEL_SMOOTH:
            loss_t = crit(log_softmax(output_t.view(-1, vocab_size)),
                          test_label)
        else:
            loss_t = F.cross_entropy(output_t.view(-1, vocab_size),
                                     test_label,
                                     ignore_index=tokens['PAD_TOKEN'])

        total_loss_t += loss_t.data[0]
        if showAttn:
            global_index_t = 0
            for t_idx, t_in in zip(test_index, test_in):
                visualizeAttn(t_in.data[0], test_in_len[0],
                              [j[global_index_t] for j in attn_weights_t],
                              modelID, batch_count_n[global_index_t],
                              'test_' + t_idx.split(',')[0])
                global_index_t += 1

    total_loss_t /= (num + 1)
    writeLoss(total_loss_t, 'test')
    print('       TEST loss=%.3f, time=%.3f' %
          (total_loss_t, time.time() - start_t))
Example #6
0
    def __init__(
            self,
            dataloader,
            params,
            save_model_every=1,  # Every Number of epochs to save after
            print_every=1000,  # Every Number of batches to print after
            dev_loader=None,
            #test_loader=None,
            vocab=None,
            saver=None,
            resume_training=False,
            resume_epoch=None):

        self.save_model_every = save_model_every
        self.print_every = print_every
        self.params = params
        self.vocab = vocab
        self.model_name = params[C.MODEL_NAME]
        self.start_epoch = 0
        self.resume_training = resume_training
        self.lr = None

        # Data Loaders
        self.dataloader = dataloader
        self.dev_loader = dev_loader
        #self.test_loader = test_loader

        # Saver and Logger
        self.saver = saver
        self.logger = self.saver.logger

        # Model
        self.model = Seq2Seq(
            self.vocab.get_vocab_size(),
            hsizes(params, self.model_name),
            params,
        ) if self.dataloader else None

        self.logger.log('MODEL : %s' % self.model)
        self.logger.log('PARAMS: %s' % self.params)

        # Optimizer and loss metrics
        if self.resume_training:
            self.optimizer, self.metrics = self.saver.load_model_and_state(
                resume_epoch, self.model)
            self.start_epoch = resume_epoch + 1
        else:
            self.optimizer = None
            self.metrics = TrainerMetrics(self.logger)

        self.loss = Loss()
        if USE_CUDA:
            if self.model:
                self.model = self.model.cuda()
Example #7
0
def test(test_loader, modelID, showAttn=True):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).to(device)
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).to(device)
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).to(device)
    model_file = 'save_weights/seq2seq-' + str(modelID) + '.model'
    print('Loading ' + model_file)
    seq2seq.load_state_dict(torch.load(model_file))  #load

    seq2seq.eval()
    total_loss_t = 0
    start_t = time.time()
    with torch.no_grad():
        for num, (test_index, test_in, test_in_len,
                  test_out) in enumerate(test_loader):
            #test_in = test_in.unsqueeze(1)
            test_in, test_out = test_in.to(device), test_out.to(device)
            if test_in.requires_grad or test_out.requires_grad:
                print(
                    'ERROR! test_in, test_out should have requires_grad=False')
            output_t, attn_weights_t = seq2seq(test_in,
                                               test_out,
                                               test_in_len,
                                               teacher_rate=False,
                                               train=False)
            batch_count_n = writePredict(modelID, test_index, output_t, 'test')
            test_label = test_out.permute(1, 0)[1:].reshape(-1)
            #loss_t = F.cross_entropy(output_t.view(-1, vocab_size),
            #                        test_label, ignore_index=tokens['PAD_TOKEN'])
            #loss_t = loss_label_smoothing(output_t.view(-1, vocab_size), test_label)
            if LABEL_SMOOTH:
                loss_t = crit(log_softmax(output_t.reshape(-1, vocab_size)),
                              test_label)
            else:
                loss_t = F.cross_entropy(output_t.reshape(-1, vocab_size),
                                         test_label,
                                         ignore_index=tokens['PAD_TOKEN'])

            total_loss_t += loss_t.item()

            if showAttn:
                global_index_t = 0
                for t_idx, t_in in zip(test_index, test_in):
                    visualizeAttn(t_in.detach()[0], test_in_len[0],
                                  [j[global_index_t] for j in attn_weights_t],
                                  modelID, batch_count_n[global_index_t],
                                  'test_' + t_idx.split(',')[0])
                    global_index_t += 1

        total_loss_t /= (num + 1)
        writeLoss(total_loss_t, 'test')
        print('    TEST loss=%.3f, time=%.3f' %
              (total_loss_t, time.time() - start_t))
def train(dataset, params):
    batches = list(
        dataset.get_batch(params.batch_size, params.src_max_length,
                          params.tgt_max_length))
    n_batches = (dataset.total_pairs - 1) // params.batch_size + 1
    model = Seq2Seq(params, dataset.vocab,
                    dataset.SPECIAL_TOKENS)  # define the model
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=params.lr)  # use ADAM optimizer

    for epoch_count in range(1, 1 + params.n_epoch):
        epoch_loss = 0.
        prog_bar = tqdm(range(1, n_batches + 1),
                        desc='Epoch %d' % epoch_count)  # track the progress
        model.train()

        for batch_count in prog_bar:
            optimizer.zero_grad()
            batch = batches[batch_count - 1]
            source_tensor, target_tensor = batch
            source_tensor = source_tensor.to(device)
            target_tensor = target_tensor.to(device)

            # calculate output and losses
            output_tokens, batch_loss = model(source_tensor, target_tensor)

            # backward propagation
            batch_loss.backward()
            optimizer.step()

            batch_loss_value = batch_loss.item()
            epoch_loss += batch_loss_value
            epoch_avg_loss = epoch_loss / batch_count

            if batch_count % 100 == 0:
                prog_bar.set_postfix(loss='%g' % epoch_avg_loss)
                print("\n")
                print("Example Article:\n")
                print("{}\n".format(" ".join(
                    [dataset.vocab[i] for i in source_tensor[:, 0]])))
                print("Example Summary:\n")
                print("{}\n".format(" ".join(
                    [dataset.vocab[i] for i in target_tensor[:, 0]])))
                print("Output Summmary:\n")
                print("{}\n".format(" ".join(
                    [dataset.vocab[i] for i in output_tokens[:, 0]])))

        # save model
        filename = "{}.{}.pt".format(params.model_path_prefix, epoch_count)
        torch.save(model.state_dict(), filename)
def main():

    mkdirs(os.path.join('experiments', 'maskmle', args.model_size))
    train_loader, test_loader, valid_loader, vocabulary_size = create_ptb_loader(
        args.data_dir, args.batch_size, args.seq_len)
    # Instantiate and init the model, and move it to the GPU
    model = Seq2Seq(vocabulary_size, model_config)
    if args.pretrained:
        model.load_pretrained_weights(
            os.path.join('experiments', 'lm', args.model_size,
                         'model_best.pth.tar'))
    else:
        print('NO PRETRAINED LANGUAGE MODEL!!')
        return
    if CUDA_AVAIL:
        model = model.cuda()

    criterion = torch.nn.NLLLoss()

    # Define optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=model_config.learning_rate)

    lr = model_config.learning_rate
    best_val_loss = np.inf
    for e in tqdm(range(model_config.max_max_epoch), desc='Epoch'):

        model = train(train_loader, model, criterion, optimizer)
        val_loss = eval(valid_loader, model, criterion)

        state = {
            'arch': "RnnLM",
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            save_checkpoint(state, folder=args.model_size, is_best=True)
        else:
            lr /= model_config.lr_decay
            optimizer = torch.optim.SGD(model.parameters(), lr=lr)
            save_checkpoint(state, folder=args.model_size)

        # Test
        test_loss = eval(test_loader, model, criterion)

        # Report
        msg = 'Epoch %d: \tValid loss=%.4f \tTest loss=%.4f \tTest perplexity=%.1f' % (
            e + 1, val_loss, test_loss, np.exp(test_loss))
        tqdm.write(msg)
def make_gated_dss(config, device):
    INPUT_DIM = src_vocab_length()
    OUTPUT_DIM = trg_vocab_length()

    enc = GatedDSSEncoder(INPUT_DIM,
                          config.hid_dim,
                          config.enc_layers,
                          config.enc_dropout,
                          device)
    dec = GatedDSSDecoder(OUTPUT_DIM,
                          config.hid_dim,
                          config.dec_layers,
                          config.dec_dropout,
                          device)
    return Seq2Seq(enc, dec, device, decode_mask_type="sequence").to(device)
def make_dss_enc_transformer_dec(config, device):
    INPUT_DIM = src_vocab_length()
    OUTPUT_DIM = trg_vocab_length()
    enc = DSSEncoder(INPUT_DIM,
                     config.hid_dim,
                     config.enc_layers,
                     config.enc_dropout,
                     device)
    dec = TransformerDecoder(OUTPUT_DIM,
                             config.hid_dim,
                             config.dec_layers,
                             config.dec_heads,
                             config.dec_pf_dim,
                             config.dec_dropout,
                             device)
    return Seq2Seq(enc, dec, device).to(device)
def predict():
    outputter = tf.gfile.GFile(FLAGS.output_model_path + "/" +
                               FLAGS.result_filename,
                               mode="w")
    predict_mode = tf.contrib.learn.ModeKeys.INFER if FLAGS.mode == 'predict' else tf.contrib.learn.ModeKeys.EVAL
    model = Seq2Seq()
    if predict_mode == tf.contrib.learn.ModeKeys.INFER:
        if FLAGS.use_mstf_ops:
            pred_pipe = InputPipe(FLAGS.input_validation_data_path,
                                  FLAGS.eval_batch_size, 1, FLAGS.test_fields,
                                  "", True)
        else:
            pred_pipe = InputPipe(FLAGS.input_validation_data_path,
                                  FLAGS.eval_batch_size, 1, FLAGS.test_fields,
                                  "0", True)
        trainer = SingleboxTrainer(model, None, None, None, pred_pipe)
    else:
        if FLAGS.use_mstf_ops:
            red_pipe = InputPipe(FLAGS.input_validation_data_path,
                                 FLAGS.eval_batch_size, 1, FLAGS.test_fields,
                                 "", True)
        else:
            pred_pipe = InputPipe(FLAGS.input_validation_data_path,
                                  FLAGS.eval_batch_size, 1, FLAGS.test_fields,
                                  "0", True)
        trainer = SingleboxTrainer(model, None, None, pred_pipe, None)
    scope = tf.get_variable_scope()
    scope.reuse_variables()
    saver = tf.train.Saver()
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        sess.run(tf.local_variables_initializer())
        sess.run(tf.tables_initializer())
        sess.run(tf.global_variables_initializer())
        ckpt = tf.train.get_checkpoint_state(FLAGS.input_previous_model_path)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print("Load model from ", ckpt.model_checkpoint_path)
        else:
            print("No initial model found.")
        trainer.predict(sess, predict_mode, outputter)
    outputter.close()
Example #13
0
    def __init__(self, weight_path, have_att=False):
        ENC_EMB_DIM = 256
        DEC_EMB_DIM = 256
        ENC_HID_DIM = 512
        DEC_HID_DIM = 512
        ENC_DROPOUT = 0.5
        DEC_DROPOUT = 0.5
        MAX_LEN = 46
        self.maxlen = MAX_LEN
        self.vocab = Vocab(alphabets)

        INPUT_DIM = self.vocab.__len__()
        OUTPUT_DIM = self.vocab.__len__()

        if have_att:
            self.model = Seq2Seq(input_dim=INPUT_DIM,
                                 output_dim=OUTPUT_DIM,
                                 encoder_embbeded=ENC_EMB_DIM,
                                 decoder_embedded=DEC_EMB_DIM,
                                 encoder_hidden=ENC_HID_DIM,
                                 decoder_hidden=DEC_HID_DIM,
                                 encoder_dropout=ENC_DROPOUT,
                                 decoder_dropout=DEC_DROPOUT)
        else:
            self.model = Seq2Seq_WithoutAtt(input_dim=INPUT_DIM,
                                            output_dim=OUTPUT_DIM,
                                            encoder_embbeded=ENC_EMB_DIM,
                                            decoder_embedded=DEC_EMB_DIM,
                                            encoder_hidden=ENC_HID_DIM,
                                            decoder_hidden=DEC_HID_DIM,
                                            encoder_dropout=ENC_DROPOUT,
                                            decoder_dropout=DEC_DROPOUT)

        self.load_weights(weight_path)
        if torch.cuda.is_available():
            self.device = "cuda"
            self.model.to('cuda')
        else:
            self.device = "cpu"

        print("Device: ", self.device)
        print("Loaded model")
Example #14
0
    def __init__(self,
                 hidden_size,
                 num_layers,
                 device='cuda',
                 drop_prob=0,
                 lstm=True,
                 feature_norm=False,
                 input_size=100,
                 output_size=100,
                 bidirectional=True):
        super().__init__()

        self.seq2seq = Seq2Seq(hidden_size=hidden_size,
                               num_layers=num_layers,
                               device='cuda',
                               drop_prob=drop_prob,
                               lstm=lstm,
                               feature_norm=feature_norm,
                               input_size=input_size,
                               output_size=output_size,
                               bidirectional=bidirectional)
Example #15
0
def main(args):
    train_dataset = BindingDataset('train', args=args)
    data_from_train = train_dataset.anony_ques_max_len, train_dataset.anony_query_max_len, train_dataset.anony_ques_vocab, train_dataset.anony_query_vocab
    args.anony_ques_max_len, args.anony_query_max_len, args.anony_ques_vocab, args.anony_query_vocab = data_from_train
    print(args.anony_ques_max_len, args.anony_query_max_len, len(args.anony_ques_vocab), args.anony_query_vocab)
    train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=args.shuffle)
    # build dev_dataloader
    args.shuffle = False
    dev_dataset = BindingDataset('dev', args=args, data_from_train=data_from_train)
    dev_dataloader = DataLoader(dataset=dev_dataset, batch_size=args.batch_size, shuffle=args.shuffle)
    # build test_dataloader
    # test_dataset = BindingDataset('test', args=args, data_from_train=data_from_train)
    # test_dataloader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=args.shuffle)
    # load word embedding
    # train
    encoder = Encoder(len(args.anony_ques_vocab), args.word_dim, args.hidden_size,
                      n_layers=2 * args.num_layers, dropout=args.dropout_p)
    decoder = Decoder(args.word_dim, args.hidden_size, len(args.anony_query_vocab),
                      n_layers=args.num_layers, dropout=args.dropout_p)
    model = Seq2Seq(encoder, decoder)
    train(train_dataloader, dev_dataloader, args, model)
def test(dataset, params):
    batches = list(
        dataset.get_batch(params.batch_size, params.src_max_length,
                          params.tgt_max_length))
    n_batches = (dataset.total_pairs - 1) // params.batch_size + 1
    model = Seq2Seq(params, dataset.vocab, dataset.SPECIAL_TOKENS)
    model = model.to(device)

    # load model from saved checkpoint
    model.load_state_dict(torch.load(params.model_path_prefix + ".25.pt"))
    model.eval()
    rouge = Rouge()

    pred_texts = []
    target_texts = []
    source_texts = []
    loss_total = 0.
    bleu_total = 0.

    for batch_count, batch in enumerate(batches):
        source_tensor, target_tensor = batch

        # get predicted output
        with torch.no_grad():
            source_tensor = source_tensor.to(device)
            target_tensor = target_tensor.to(device)
            output_tokens, batch_loss = model.beam_search(
                source_tensor, params.beam_size)
        batch_loss_value = batch_loss.item()
        loss_total += batch_loss_value

        pred_text = get_raw_texts(output_tokens,
                                  vocab=dataset.vocab,
                                  special_tokens=dataset.SPECIAL_TOKENS)
        pred_texts.extend(pred_text)
        target_text = get_raw_texts(target_tensor,
                                    vocab=dataset.vocab,
                                    special_tokens=dataset.SPECIAL_TOKENS)
        target_texts.extend(target_text)
        source_text = get_raw_texts(source_tensor,
                                    vocab=dataset.vocab,
                                    special_tokens=dataset.SPECIAL_TOKENS)
        source_texts.extend(source_text)

        # calculate bleu score
        for i in range(params.batch_size):
            bleu_total += bleu.sentence_bleu([target_text[i]], pred_text[i])
        if batch_count % 100 == 0:
            print("predicting batch {} / total batch {}".format(
                batch_count + 1, n_batches))

    # calculate rouge score
    scores = rouge.get_scores(pred_texts,
                              target_texts,
                              avg=True,
                              ignore_empty=True)
    print("Rouge scores:\n {}\n".format(scores))
    bleu_avg = bleu_total / dataset.total_pairs
    print("Bleu average scores:\n {}\n".format(bleu_avg))
    loss_average = loss_total / n_batches
    print("Negative Log Likelihood:\n {}\n".format(loss_average))

    for i in range(5):
        print("Example: {}\n".format(i + 1))
        print("Article: {}\n".format(source_texts[i]))
        print("True Summary: {}\n".format(target_texts[i]))
        print("Generated Summary: {}\n".format(pred_texts[i]))
Example #17
0
    def __init__(self, alphabets_, list_ngram):

        self.vocab = Vocab(alphabets_)
        self.synthesizer = SynthesizeData(vocab_path="")
        self.list_ngrams_train, self.list_ngrams_valid = self.train_test_split(
            list_ngram, test_size=0.1)
        print("Loaded data!!!")
        print("Total training samples: ", len(self.list_ngrams_train))
        print("Total valid samples: ", len(self.list_ngrams_valid))

        INPUT_DIM = self.vocab.__len__()
        OUTPUT_DIM = self.vocab.__len__()

        self.device = DEVICE
        self.num_iters = NUM_ITERS
        self.beamsearch = BEAM_SEARCH

        self.batch_size = BATCH_SIZE
        self.print_every = PRINT_PER_ITER
        self.valid_every = VALID_PER_ITER

        self.checkpoint = CHECKPOINT
        self.export_weights = EXPORT
        self.metrics = MAX_SAMPLE_VALID
        logger = LOG

        if logger:
            self.logger = Logger(logger)

        self.iter = 0

        self.model = Seq2Seq(input_dim=INPUT_DIM,
                             output_dim=OUTPUT_DIM,
                             encoder_embbeded=ENC_EMB_DIM,
                             decoder_embedded=DEC_EMB_DIM,
                             encoder_hidden=ENC_HID_DIM,
                             decoder_hidden=DEC_HID_DIM,
                             encoder_dropout=ENC_DROPOUT,
                             decoder_dropout=DEC_DROPOUT)

        self.optimizer = AdamW(self.model.parameters(),
                               betas=(0.9, 0.98),
                               eps=1e-09)
        self.scheduler = OneCycleLR(self.optimizer,
                                    total_steps=self.num_iters,
                                    pct_start=PCT_START,
                                    max_lr=MAX_LR)

        self.criterion = LabelSmoothingLoss(len(self.vocab),
                                            padding_idx=self.vocab.pad,
                                            smoothing=0.1)

        self.train_gen = self.data_gen(self.list_ngrams_train,
                                       self.synthesizer,
                                       self.vocab,
                                       is_train=True)
        self.valid_gen = self.data_gen(self.list_ngrams_valid,
                                       self.synthesizer,
                                       self.vocab,
                                       is_train=False)

        self.train_losses = []

        # to device
        self.model.to(self.device)
        self.criterion.to(self.device)
Example #18
0
def train():
    reader = PWKPReader()
    train_dataset = reader.read(train_path)
    valid_dataset = reader.read(dev_path)
    if os.path.exists(vocab_dir):
        vocab = Vocabulary.from_files(vocab_dir)
    else:
        vocab = Vocabulary.from_instances(instances=train_dataset,
                                          max_vocab_size=opt.vocab_size)
        vocab.save_to_files(vocab_dir)
    iterator = BucketIterator(batch_size=opt.batch_size,
                              sorting_keys=[("src", "num_tokens"),
                                            ("tgt", "num_tokens")])
    iterator.index_with(vocab)

    model = Seq2Seq(emb_size=opt.emb_size,
                    hidden_size=opt.hidden_size,
                    enc_layers=opt.enc_layers,
                    dec_layers=opt.dec_layers,
                    dropout=opt.dropout,
                    bidirectional=opt.bidirectional,
                    beam_size=opt.beam_size,
                    label_smoothing=opt.label_smoothing,
                    vocab=vocab)

    optimizer = optim.Adam(model.parameters(), lr=opt.lr)
    #learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=1, gamma=opt.lr_decay)

    val_iterator = BasicIterator(batch_size=opt.batch_size)
    val_iterator.index_with(vocab)

    predictor = Predictor(iterator=val_iterator,
                          max_decoding_step=opt.max_step,
                          vocab=vocab,
                          reader=reader,
                          data_path=test_path,
                          log_dir=save_dir,
                          map_path=ner_path,
                          cuda_device=opt.gpu)

    trainer = Trainer(
        model=model,
        optimizer=optimizer,
        #learning_rate_scheduler=learning_rate_scheduler,
        learning_rate_decay=opt.lr_decay,
        ema_decay=opt.ema_decay,
        predictor=predictor,
        iterator=iterator,
        train_dataset=train_dataset,
        validation_dataset=valid_dataset,
        validation_metric='+bleu',
        cuda_device=opt.gpu,
        num_epochs=opt.epoch,
        serialization_dir=save_dir,
        num_serialized_models_to_keep=5,
        #model_save_interval=60,
        #summary_interval=500,
        should_log_parameter_statistics=False,
        grad_norm=10)

    trainer.train()
Example #19
0
def main():
    """Main method to run the models"""
    args = parse_args()
    dataset = []
    vocab = []
    whole_data = []
    for x in [ques,query]:
        dataset.append(VerbalDataset())
        #changed - cover_entities
        dataset[x].load_data_and_fields(cover_entities=True, query_as_input=x)
        vocab.append(dataset[x].get_vocabs())
        whole_data.append(dataset[x].get_data())

    src_vocab, trg_vocab = vocab[0]
    src_vocab_query, trg_vocab_query = vocab[1]
    train_data_question, valid_data_question, test_data_question = whole_data[0]
    print("train_data_quer", len(list(train_data_question)))
    train_data_query, valid_data_query, test_data_query = whole_data[1]

    save_vocab(trg_vocab)

    print('--------------------------------')
    print(f'Model: {args.model}')
    print(f'Model input: {args.input}')
    if args.model == RNN_NAME:
        print(f'Attention: {args.attention}')
    print(f'Cover entities: {args.cover_entities}')
    print('--------------------------------')
    print(f"Training data: {len(train_data_query.examples)}")
    print(f"Evaluation data: {len(valid_data_query.examples)}")
    print(f"Testing data: {len(test_data_query.examples)}")
    print('--------------------------------')
    print(f'Question example: {train_data_query.examples[0].src}')
    print(f'Answer example: {train_data_query.examples[0].trg}')
    print('--------------------------------')
    print(f"Unique tokens in questions vocabulary: {len(src_vocab_query)}")
    print(f"Unique tokens in answers vocabulary: {len(trg_vocab_query)}")
    print('--------------------------------')
    print(f'Batch: {args.batch_size}')
    print(f'Epochs: {args.epochs_num}')
    print('--------------------------------')

    if args.model == RNN_NAME and args.attention == ATTENTION_1:
        from models.rnn1 import Encoder, Decoder
    elif args.model == RNN_NAME and args.attention == ATTENTION_2:
        from models.rnn2 import Encoder, Decoder
    elif args.model == CNN_NAME:
        from models.cnn import Encoder, Decoder
    elif args.model == TRANSFORMER_NAME:
        from models.transformer import Encoder, Decoder, NoamOpt

    # create model
    encoder = Encoder(src_vocab, DEVICE)
    encoder_query = Encoder(src_vocab_query, DEVICE)
    decoder = Decoder(trg_vocab_query, DEVICE)
    model = Seq2Seq(encoder, encoder_query, decoder, args.model).to(DEVICE)

    parameters_num = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f'The model has {parameters_num:,} trainable parameters')
    print('--------------------------------')

    # create optimizer
    if model.name == TRANSFORMER_NAME:
        # initialize model parameters with Glorot / fan_avg
        for p in model.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)
        optimizer = NoamOpt(torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
    else:
        optimizer = optim.Adam(model.parameters())

    # define criterion
    criterion = nn.CrossEntropyLoss(ignore_index=trg_vocab.stoi[PAD_TOKEN])

    # train data
    trainer = Trainer(optimizer, criterion, args.batch_size, DEVICE)
    trainer.train(model, train_data_question, train_data_query, valid_data_question, valid_data_query, num_of_epochs=args.epochs_num)

    # load model
    model = Chechpoint.load(model)

    # generate test iterator
    valid_iterator, test_iterator = BucketIterator.splits(
                                        (valid_data_question, test_data_question),
                                        repeat=False,
                                        batch_size=args.batch_size,
                                        sort_within_batch=True if args.model == RNN_NAME else False,
                                        sort_key=lambda x: len(x.src),
                                        device=DEVICE)

    valid_iterator_query, test_iterator_query = BucketIterator.splits(
                                        (valid_data_query, test_data_query),
                                        repeat=False,
                                        batch_size=args.batch_size,
                                        sort_within_batch=True if args.model == RNN_NAME else False,
                                        sort_key=lambda x: len(x.src),
                                        device=DEVICE)
    
    
    # evaluate model
    valid_loss = trainer.evaluator.evaluate(model, valid_iterator, valid_iterator_query)
    test_loss = trainer.evaluator.evaluate(model, test_iterator, test_iterator_query)


    # calculate blue score for valid and test data
    predictor = Predictor(model, src_vocab, src_vocab_query, trg_vocab, DEVICE)
    valid_scorer = BleuScorer()
    test_scorer = BleuScorer()
    valid_scorer.data_score(valid_data_question.examples, valid_data_query.examples, predictor)
    results, _ = test_scorer.data_score(test_data_question.examples, test_data_query.examples, predictor)

    for k in results[0:10]:
        print("reference ", k['reference'])
        print("hypothesis", k['hypothesis'])

    print(f'| Val. Loss: {valid_loss:.3f} | Test PPL: {math.exp(valid_loss):7.3f} |')
    print(f'| Val. Data Average BLEU score {valid_scorer.average_score()} |')
    print(f'| Val. Data Average METEOR score {valid_scorer.average_meteor_score()} |')
    print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')
    print(f'| Test Data Average BLEU score {test_scorer.average_score()} |')
    print(f'| Test Data Average METEOR score {test_scorer.average_meteor_score()} |')
Example #20
0
            print_accuracy_measures(predictions, actuals)


if __name__ == "__main__":
    # Init models
    models = []

    seq2seq = Seq2Seq(name="seq2seq",
                      data_dict=data_dict,
                      batch_size=batch_size,
                      state_size=state_size,
                      input_feature_amount=input_feature_amount,
                      output_feature_amount=output_feature_amount,
                      seq_len_in=seq_len_in,
                      seq_len_out=seq_len_out,
                      plot_time_steps_view=plot_time_steps_view,
                      steps_per_epoch=steps_per_epoch,
                      epochs=epochs,
                      learning_rate=learning_rate,
                      intermediates=intermediates,
                      plot_loss=plot_loss,
                      load_weights_path=load_s2s_weights_path,
                      agg_level=agg_level
                      )

    seq2seq_1dconv = Seq2SeqConv(name="seq2seq_1dconv",
                                 data_dict=data_dict,
                                 batch_size=batch_size,
                                 state_size=state_size,
                                 input_feature_amount=input_feature_amount,
                                 output_feature_amount=output_feature_amount,
Example #21
0
def main(train_loader, valid_loader, test_loader):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).cuda()
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).cuda()
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda()
    if CurriculumModelID > 0:
        model_file = 'save_weights/seq2seq-' + str(
            CurriculumModelID) + '.model'
        #model_file = 'save_weights/words/seq2seq-' + str(CurriculumModelID) +'.model'
        print('Loading ' + model_file)
        seq2seq.load_state_dict(torch.load(model_file))  #load
    opt = optim.Adam(seq2seq.parameters(), lr=learning_rate)
    #opt = optim.SGD(seq2seq.parameters(), lr=learning_rate, momentum=0.9)
    #opt = optim.RMSprop(seq2seq.parameters(), lr=learning_rate, momentum=0.9)

    #scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=1)
    scheduler = optim.lr_scheduler.MultiStepLR(opt,
                                               milestones=lr_milestone,
                                               gamma=lr_gamma)
    epochs = 5000000
    if EARLY_STOP_EPOCH is not None:
        min_loss = 1e3
        min_loss_index = 0
        min_loss_count = 0

    if CurriculumModelID > 0 and WORD_LEVEL:
        start_epoch = CurriculumModelID + 1
        for i in range(start_epoch):
            scheduler.step()
    else:
        start_epoch = 0

    for epoch in range(start_epoch, epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False
        start = time.time()
        loss = train(train_loader, seq2seq, opt, teacher_rate, epoch)
        writeLoss(loss, 'train')
        print('epoch %d/%d, loss=%.3f, lr=%.8f, teacher_rate=%.3f, time=%.3f' %
              (epoch, epochs, loss, lr, teacher_rate, time.time() - start))

        if epoch % MODEL_SAVE_EPOCH == 0:
            folder_weights = 'save_weights'
            if not os.path.exists(folder_weights):
                os.makedirs(folder_weights)
            torch.save(seq2seq.state_dict(),
                       folder_weights + '/seq2seq-%d.model' % epoch)

        start_v = time.time()
        loss_v = valid(valid_loader, seq2seq, epoch)
        writeLoss(loss_v, 'valid')
        print('  Valid loss=%.3f, time=%.3f' % (loss_v, time.time() - start_v))

        if EARLY_STOP_EPOCH is not None:
            gt = 'RWTH_partition/RWTH.iam_word_gt_final.valid.thresh'
            decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log'
            res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded],
                                stdout=sub.PIPE)
            res_cer = res_cer.stdout.read().decode('utf8')
            loss_v = float(res_cer) / 100
            if loss_v < min_loss:
                min_loss = loss_v
                min_loss_index = epoch
                min_loss_count = 0
            else:
                min_loss_count += 1
            if min_loss_count >= EARLY_STOP_EPOCH:
                print('Early Stopping at: %d. Best epoch is: %d' %
                      (epoch, min_loss_index))
                return min_loss_index
Example #22
0
    raw = torch.load('./dat/processed/raw_states.pt')

    for index, vects in d.items():
        # each is N x 300
        input_state, next_state = vects[0], vects[1]
        # raw strings corresponding to embeddings
        raw_input_state, raw_next_state = list(raw.keys())[index], raw[list(
            raw.keys())[index]]
        print(raw_input_state)
        print(input_state)
        print(raw_next_state)
        print(next_state)
        if index > 1:
            break

    model = Seq2Seq(hidden_size=2, num_layers=2)
    print(model)

    for index, vects in d.items():
        # each is N x 300
        input_state, next_state = vects[0], vects[1]
        # raw strings corresponding to embeddings
        raw_input_state, raw_next_state = list(raw.keys())[index], raw[list(
            raw.keys())[index]]

        #print(input_state.unsqueeze(0).shape)
        mu = model(input_state.unsqueeze(0)).detach()
        #print(mu.shape)

        # ACTOR FORMAT
        logstd = torch.zeros_like(mu)
Example #23
0
def run():

    ## Load Config from JSON file
    dir_path = os.path.dirname(os.path.realpath(__file__))
    config_path = os.path.join(dir_path, "experiment", FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    if not os.path.exists(FLAGS.data_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    ## Load Data
    df = dl.load_raw_text_file(FLAGS.data_path, num_examples=30000)

    # index language for Input and Output
    inp_index = LanguageIndex(phrases=df["es"].values)
    targ_index = LanguageIndex(df["eng"].values)
    vocab_inp_size = len(inp_index.word2idx)
    vocab_tar_size = len(targ_index.word2idx)

    # Convert Sentences into tokenized tensors
    input_tensor, target_tensor = dl.convert_tensor(df, inp_index, targ_index)
    # Split to training and test set
    input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(
        input_tensor, target_tensor, test_size=0.2)
    train_dataset = MyData(input_tensor_train, target_tensor_train)
    val_dataset = MyData(input_tensor_val, target_tensor_val)

    # Conver to DataLoader Object
    train_dataset = data.DataLoader(train_dataset,
                                    batch_size=config['batch_size'],
                                    drop_last=True,
                                    shuffle=True)

    eval_dataset = data.DataLoader(val_dataset,
                                   batch_size=config['batch_size'],
                                   drop_last=False,
                                   shuffle=True)
    # Models
    model = Seq2Seq(config, vocab_inp_size, vocab_tar_size)
    scorer = create_scorer(config['metrics'])

    if config['gpu']:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)
    print("Weight Initialized")

    ## Train and Evaluate over epochs
    all_train_avg_loss = []
    all_eval_avg_loss = []
    all_eval_avg_acc = []

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        model, optimizer, train_avg_loss = train(model, optimizer,
                                                 train_dataset, run_state,
                                                 config['debug'])
        all_train_avg_loss.append(train_avg_loss)

        # Return Val Set Loss and Accuracy
        eval_avg_loss, eval_acc = evaluate(model, eval_dataset, targ_index,
                                           scorer, config['debug'])
        all_eval_avg_loss.append(eval_avg_loss)
        all_eval_avg_acc.append(eval_acc)

        # Save Model Checkpoint
        checkpoint_dict = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': eval_avg_loss,
        }

        checkpoint_path = '{}/epoch_{:0.0f}_val_loss_{:0.3f}.pt'.format(
            FLAGS.model_checkpoint_dir, epoch, eval_avg_loss)
        torch.save(checkpoint_dict, checkpoint_path)

    # Export Model Learning Curve Info
    df = pd.DataFrame({
        'epoch': range(FLAGS.epochs),
        'train_loss': all_train_avg_loss,
        'eval_loss': all_eval_avg_loss,
        'eval_acc': all_eval_avg_acc
    })

    now = datetime.now()
    current_time = now.strftime("%Y%m%d%H%M%S")
    export_path = '{}/{}_{:0.0f}_bz_{}_val_loss_{:0.3f}.csv'.format(
        FLAGS.metrics_dir, current_time, FLAGS.epochs, config['batch_size'],
        eval_avg_loss)
    df.to_csv(export_path, index=False)
Example #24
0
        logger.log('Converting data entries to tensors')
        tensor_builder = TensorBuilder(input_lang, output_lang)
        train_pairs = [
            tensor_builder.tensorsFromPair(pair) for pair in train_pairs
        ]
        valid_pairs = [
            tensor_builder.tensorsFromPair(pair) for pair in valid_pairs
        ]
        test_pairs = [
            tensor_builder.tensorsFromPair(pair) for pair in test_pairs
        ]

        logger.log('Building the model')
        model = Seq2Seq(input_size=input_lang.n_words,
                        output_size=output_lang.n_words,
                        hidden_size=constants.HIDDEN_SIZE,
                        learning_rate=constants.LEARNING_RATE,
                        teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO,
                        device=constants.DEVICE)

        logger.log(str(model))

        logger.log('Initializing evaluators')
        evaluator = Evaluator(valid_pairs, input_lang, output_lang)
        test_set_evaluator = Evaluator(test_pairs, input_lang, output_lang)

    except Exception as e:
        # Log the error message and raise it again so see more info
        logger.log("Error: " + str(e))
        raise e

    successful = False
Example #25
0
    input_lengths[-2] = Ti - 2
    padded_input[-1, -3:, ] = 0
    input_lengths[-1] = Ti - 3

    encoder = Encoder(D, H, Li, bidirectional=B, rnn_type=R)

    # Decoder
    VOC, EMB, SOS, EOS, L = 10, 3, 8, 9, 2
    H = H * 2 if B else H
    padded_target = torch.randint(10, (N, To), dtype=torch.long)  # N x To
    padded_target[-1, -3:] = IGNORE_ID

    decoder = Decoder(VOC, EMB, SOS, EOS, H, L)

    # Seq2Seq
    seq2seq = Seq2Seq(encoder, decoder)
    loss = seq2seq(padded_input, input_lengths, padded_target)
    print(loss)
    # print(decoder_outputs)
    # print("To+1 =", len(decoder_outputs))
    # print("N, V =", decoder_outputs[0].size())

    import argparse
    beam_size = 5
    nbest = 5
    defaults = dict(beam_size=beam_size, nbest=nbest, decode_max_len=0)
    args = argparse.Namespace(**defaults)
    char_list = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
    for i in range(3):
        print("\n***** Utt", i + 1)
        Ti = i + 20
def main(all_data_loader_func):
    encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP,
                      FLIP).cuda()
    decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention,
                      TRADEOFF_CONTEXT_EMBED).cuda()
    seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda()
    if CurriculumModelID > 0:
        model_file = 'save_weights/seq2seq-' + str(
            CurriculumModelID) + '.model'
        print('Loading ' + model_file)
        pretrain_dict = torch.load(model_file)
        seq2seq_dict = seq2seq.state_dict()
        pretrain_dict = {
            k: v
            for k, v in pretrain_dict.items() if k in seq2seq_dict
        }
        seq2seq_dict.update(pretrain_dict)
        seq2seq.load_state_dict(seq2seq_dict)  #load
    opt = optim.Adam(seq2seq.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.MultiStepLR(opt,
                                               milestones=lr_milestone,
                                               gamma=lr_gamma)
    epochs = 5000
    if EARLY_STOP_EPOCH is not None:
        min_loss = 1e3
        min_loss_index = 0
        min_loss_count = 0

    if CurriculumModelID > 0:
        start_epoch = CurriculumModelID + 1
        for i in range(start_epoch):
            scheduler.step()
    else:
        start_epoch = 0

    for epoch in range(start_epoch, epochs):
        # each epoch, random sample training set to be balanced with unlabeled test set
        train_loader, valid_loader, test_loader = all_data_loader_func()
        scheduler.step()
        lr = scheduler.get_lr()[0]
        teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False
        start = time.time()

        lambd = return_lambda(epoch)

        loss, loss_d = train(train_loader, seq2seq, opt, teacher_rate, epoch,
                             lambd)
        writeLoss(loss, 'train')
        writeLoss(loss_d, 'domain_train')
        print(
            'epoch %d/%d, loss=%.3f, domain_loss=%.3f, lr=%.6f, teacher_rate=%.3f, lambda_pau=%.3f, time=%.3f'
            % (epoch, epochs, loss, loss_d, lr, teacher_rate, lambd,
               time.time() - start))

        if epoch % MODEL_SAVE_EPOCH == 0:
            folder_weights = 'save_weights'
            if not os.path.exists(folder_weights):
                os.makedirs(folder_weights)
            torch.save(seq2seq.state_dict(),
                       folder_weights + '/seq2seq-%d.model' % epoch)

        start_v = time.time()
        loss_v, loss_v_d = valid(valid_loader, seq2seq, epoch)
        writeLoss(loss_v, 'valid')
        writeLoss(loss_v_d, 'domain_valid')
        print('      Valid loss=%.3f, domain_loss=%.3f, time=%.3f' %
              (loss_v, loss_v_d, time.time() - start_v))

        test(test_loader, epoch, False)  #~~~~~~

        if EARLY_STOP_EPOCH is not None:
            gt = loadData.GT_TE
            decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log'
            res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded],
                                stdout=sub.PIPE)
            res_cer = res_cer.stdout.read().decode('utf8')
            loss_v = float(res_cer) / 100
            if loss_v < min_loss:
                min_loss = loss_v
                min_loss_index = epoch
                min_loss_count = 0
            else:
                min_loss_count += 1
            if min_loss_count >= EARLY_STOP_EPOCH:
                print('Early Stopping at: %d. Best epoch is: %d' %
                      (epoch, min_loss_index))
                return min_loss_index
Example #27
0
def run():
    USE_CUDA = torch.cuda.is_available()
    FLAGS.config = 'example_seq2seq.json'
    config_path = os.path.join("experiments", FLAGS.config)
    print(FLAGS.config)

    if not os.path.exists(config_path):
        raise FileNotFoundError

    with open(config_path, "r") as f:
        config = json.load(f)

    config["gpu"] = torch.cuda.is_available()

    writer = SummaryWriter('experiments/finally')

    #     dataset = ToyDataset(5, 15)
    #     eval_dataset = ToyDataset(5, 15, type='eval')
    dataset = Toy_Numbers(10)
    eval_dataset = Toy_Numbers(10, train=False)
    BATCHSIZE = 32
    train_loader = data.DataLoader(dataset,
                                   batch_size=BATCHSIZE,
                                   shuffle=False,
                                   collate_fn=pad_collate,
                                   drop_last=True)
    eval_loader = data.DataLoader(eval_dataset,
                                  batch_size=BATCHSIZE,
                                  shuffle=False,
                                  collate_fn=pad_collate,
                                  drop_last=True)
    config["batch_size"] = BATCHSIZE

    # Models
    model = Seq2Seq(config)
    model = model.float()

    # dataiter = iter(train_loader)
    # sample_input= dataiter.next()

    # writer.add_graph(model, sample_input)
    # writer.close()

    if USE_CUDA:
        model = model.cuda()

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.get("learning_rate", .001))

    print("=" * 60)
    print(model)
    print("=" * 60)
    for k, v in sorted(config.items(), key=lambda i: i[0]):
        print(" (" + k + ") : " + str(v))
    print()
    print("=" * 60)

    print("\nInitializing weights...")
    for name, param in model.named_parameters():
        if 'bias' in name:
            torch.nn.init.constant_(param, 0.0)
        elif 'weight' in name:
            torch.nn.init.xavier_normal_(param)

    for epoch in range(FLAGS.epochs):
        run_state = (epoch, FLAGS.epochs, FLAGS.train_size)

        # Train needs to return model and optimizer, otherwise the model keeps restarting from zero at every epoch
        model, optimizer = train(model, optimizer, train_loader, run_state,
                                 writer)
        # print("losses", l_list)
        # for i in l_list:
        #     # print(i)
        #     writer.add_scalar('Loss/train',i)
        evaluate(model, eval_loader, writer)
def train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        tf.gfile.Copy(FLAGS.input_previous_model_path + "/" +
                      FLAGS.decoder_vocab_file,
                      FLAGS.output_model_path + "/" + FLAGS.decoder_vocab_file,
                      overwrite=True)
        global_step = tf.train.get_or_create_global_step()
        inc_step = tf.assign_add(global_step, 1)
        #Training setting
        if FLAGS.use_mstf_ops:
            train_input_pipe = InputPipe([
                FLAGS.input_training_data_path + "/" + i
                for i in tf.gfile.ListDirectory(FLAGS.input_training_data_path)
            ], FLAGS.batch_size, FLAGS.num_epochs, 2, "", False)
            auc_eval_pipe = InputPipe(
                FLAGS.input_validation_data_path +
                "/label_data.txt", FLAGS.eval_batch_size, 1, 3, "",
                False) if FLAGS.auc_evaluation else None
            bleu_eval_pipe = InputPipe(
                FLAGS.input_validation_data_path +
                "/bleu_data.txt", FLAGS.eval_batch_size, 1, 2, "",
                False) if FLAGS.bleu_evaluation else None
        else:
            train_input_pipe = InputPipe([
                FLAGS.input_training_data_path + "/" + i
                for i in tf.gfile.ListDirectory(FLAGS.input_training_data_path)
            ], FLAGS.batch_size, FLAGS.num_epochs, 2, "0", False)
            auc_eval_pipe = InputPipe(
                FLAGS.input_validation_data_path +
                "/label_data.txt", FLAGS.eval_batch_size, 1, 3, "0",
                False) if FLAGS.auc_evaluation else None
            bleu_eval_pipe = InputPipe(
                FLAGS.input_validation_data_path +
                "/bleu_data.txt", FLAGS.eval_batch_size, 1, 2, "0",
                False) if FLAGS.bleu_evaluation else None
        model = Seq2Seq()
        trainer = SingleboxTrainer(model, inc_step, train_input_pipe,
                                   auc_eval_pipe, bleu_eval_pipe)
        summary_op = tf.summary.merge_all()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        saver = tf.train.Saver(max_to_keep=FLAGS.max_model_to_keep,
                               name='model_saver')
        with tf.Session(config=config) as session:
            summ_writer = tf.summary.FileWriter(FLAGS.log_dir, session.graph)
            #Load Pretrain
            session.run(tf.local_variables_initializer())
            session.run(tf.global_variables_initializer())
            session.run(tf.tables_initializer())
            session.run(train_input_pipe.iterator.initializer)
            ckpt = tf.train.get_checkpoint_state(
                FLAGS.input_previous_model_path)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(session, ckpt.model_checkpoint_path)
                print("Load Model From ", ckpt.model_checkpoint_path)
            else:
                print("No Initial Model Found.")
            trainer.start_time = time.time()
            while True:
                try:
                    _, avg_loss, total_weight, step, summary = session.run(
                        trainer.train_ops() + [summary_op])
                    #print(step)
                    if step % FLAGS.log_frequency == 1:
                        summ_writer.add_summary(summary, step)
                        trainer.print_log(total_weight, step, avg_loss)
                    if step % FLAGS.checkpoint_frequency == 1:
                        if FLAGS.auc_evaluation:
                            trainer.eval(step, session, 'auc')
                        if FLAGS.bleu_evaluation:
                            trainer.eval(step, session, 'bleu')
                        if trainer.improved():
                            saver.save(session,
                                       FLAGS.output_model_path +
                                       "/seq2seq_model",
                                       global_step=step)
                        elif trainer.early_stop():
                            print("\nEarly stop")
                            break
                except tf.errors.OutOfRangeError:
                    print("End of training.")
                    break
            if not trainer.early_stop():
                saver.save(session,
                           FLAGS.output_model_path + "/" +
                           "seq2seq_model_final",
                           global_step=step)
Example #29
0
def main():

    # config for training
    config = Config()
    print("Normal train config:")
    pp(config)

    valid_config = Config()
    valid_config.dropout = 0
    valid_config.batch_size = 20

    # config for test
    test_config = Config()
    test_config.dropout = 0
    test_config.batch_size = 1

    with_sentiment = config.with_sentiment

    ###############################################################################
    # Load data
    ###############################################################################
    # sentiment data path:  ../ final_data / poem_with_sentiment.txt
    # 该path必须命令行显示输入LoadPoem,因为defaultNonehjk
    # 处理pretrain数据和完整诗歌数据

    # api = LoadPoem(args.train_data_dir, args.test_data_dir, args.max_vocab_size)
    api = LoadPoem(corpus_path=args.train_data_dir,
                   test_path=args.test_data_dir,
                   max_vocab_cnt=config.max_vocab_cnt,
                   with_sentiment=with_sentiment)

    # 交替训练,准备大数据集
    poem_corpus = api.get_tokenized_poem_corpus(
        type=1 + int(with_sentiment))  # corpus for training and validation
    test_data = api.get_tokenized_test_corpus()  # 测试数据
    # 三个list,每个list中的每一个元素都是 [topic, last_sentence, current_sentence]
    train_poem, valid_poem, test_poem = poem_corpus["train"], poem_corpus[
        "valid"], test_data["test"]

    train_loader = SWDADataLoader("Train", train_poem, config)
    valid_loader = SWDADataLoader("Valid", valid_poem, config)
    test_loader = SWDADataLoader("Test", test_poem, config)

    print("Finish Poem data loading, not pretraining or alignment test")

    if not args.forward_only:
        # LOG #
        log_start_time = str(datetime.now().strftime('%Y%m%d%H%M'))
        if not os.path.isdir('./output'):
            os.makedirs('./output')
        if not os.path.isdir('./output/{}'.format(args.expname)):
            os.makedirs('./output/{}'.format(args.expname))
        if not os.path.isdir('./output/{}/{}'.format(args.expname,
                                                     log_start_time)):
            os.makedirs('./output/{}/{}'.format(args.expname, log_start_time))

        # save arguments
        json.dump(
            vars(args),
            open(
                './output/{}/{}/args.json'.format(args.expname,
                                                  log_start_time), 'w'))

        logger = logging.getLogger(__name__)
        logging.basicConfig(level=logging.DEBUG, format="%(message)s")
        fh = logging.FileHandler("./output/{}/{}/logs.txt".format(
            args.expname, log_start_time))
        # add the handlers to the logger
        logger.addHandler(fh)
        logger.info(vars(args))

        tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format(
            args.expname, log_start_time)) if args.visual else None

        if config.reload_model:
            model = load_model(config.model_name)
        else:
            if args.model == "mCVAE":
                model = CVAE_GMP(config=config, api=api)
            elif args.model == 'CVAE':
                model = CVAE(config=config, api=api)
            else:
                model = Seq2Seq(config=config, api=api)
            if use_cuda:
                model = model.cuda()

        # if corpus.word2vec is not None and args.reload_from<0:
        #     print("Loaded word2vec")
        #     model.embedder.weight.data.copy_(torch.from_numpy(corpus.word2vec))
        #     model.embedder.weight.data[0].fill_(0)

        ###############################################################################
        # Start training
        ###############################################################################
        # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布
        # pretrain = True

        cur_best_score = {
            'min_valid_loss': 100,
            'min_global_itr': 0,
            'min_epoch': 0,
            'min_itr': 0
        }

        train_loader.epoch_init(config.batch_size, shuffle=True)

        # model = load_model(3, 3)
        epoch_id = 0
        global_t = 0
        while epoch_id < config.epochs:

            while True:  # loop through all batches in training data
                # train一个batch
                model, finish_train, loss_records, global_t = \
                    train_process(global_t=global_t, model=model, train_loader=train_loader, config=config, sentiment_data=with_sentiment)
                if finish_train:
                    test_process(model=model,
                                 test_loader=test_loader,
                                 test_config=test_config,
                                 logger=logger)
                    # evaluate_process(model=model, valid_loader=valid_loader, log_start_time=log_start_time, global_t=global_t, epoch=epoch_id, logger=logger, tb_writer=tb_writer, api=api)
                    # save model after each epoch
                    save_model(model=model,
                               epoch=epoch_id,
                               global_t=global_t,
                               log_start_time=log_start_time)
                    logger.info(
                        'Finish epoch %d, current min valid loss: %.4f \
                     correspond epoch: %d  itr: %d \n\n' %
                        (cur_best_score['min_valid_loss'],
                         cur_best_score['min_global_itr'],
                         cur_best_score['min_epoch'],
                         cur_best_score['min_itr']))
                    # 初始化下一个unlabeled data epoch的训练
                    # unlabeled_epoch += 1
                    epoch_id += 1
                    train_loader.epoch_init(config.batch_size, shuffle=True)
                    break
                # elif batch_idx >= start_batch + config.n_batch_every_iter:
                #     print("Finish unlabel epoch %d batch %d to %d" %
                #           (unlabeled_epoch, start_batch, start_batch + config.n_batch_every_iter))
                #     start_batch += config.n_batch_every_iter
                #     break

                # 写一下log
                if global_t % config.log_every == 0:
                    log = 'Epoch id %d: step: %d/%d: ' \
                          % (epoch_id, global_t % train_loader.num_batch, train_loader.num_batch)
                    for loss_name, loss_value in loss_records:
                        if loss_name == 'avg_lead_loss':
                            continue
                        log = log + loss_name + ':%.4f ' % loss_value
                        if args.visual:
                            tb_writer.add_scalar(loss_name, loss_value,
                                                 global_t)
                    logger.info(log)

                # valid
                if global_t % config.valid_every == 0:
                    # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                    valid_process(
                        global_t=global_t,
                        model=model,
                        valid_loader=valid_loader,
                        valid_config=valid_config,
                        unlabeled_epoch=
                        epoch_id,  # 如果sample_rate_unlabeled不是1,这里要在最后加一个1
                        tb_writer=tb_writer,
                        logger=logger,
                        cur_best_score=cur_best_score)
                # if batch_idx % (train_loader.num_batch // 3) == 0:
                #     test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger)
                if global_t % config.test_every == 0:
                    test_process(model=model,
                                 test_loader=test_loader,
                                 test_config=test_config,
                                 logger=logger)

    # forward_only 测试
    else:
        expname = 'sentInput'
        time = '202101191105'

        model = load_model(
            './output/{}/{}/model_global_t_13596_epoch3.pckl'.format(
                expname, time))
        test_loader.epoch_init(test_config.batch_size, shuffle=False)
        if not os.path.exists('./output/{}/{}/test/'.format(expname, time)):
            os.mkdir('./output/{}/{}/test/'.format(expname, time))
        output_file = [
            open('./output/{}/{}/test/output_0.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_1.txt'.format(expname, time),
                 'w'),
            open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w')
        ]

        poem_count = 0
        predict_results = {0: [], 1: [], 2: []}
        titles = {0: [], 1: [], 2: []}
        sentiment_result = {0: [], 1: [], 2: []}
        # Get all poem predictions
        while True:
            model.eval()
            batch = test_loader.next_batch_test()  # test data使用专门的batch
            poem_count += 1
            if poem_count % 10 == 0:
                print("Predicted {} poems".format(poem_count))
            if batch is None:
                break
            title_list = batch  # batch size是1,一个batch写一首诗
            title_tensor = to_tensor(title_list)
            # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果
            for i in range(3):
                sentiment_label = np.zeros(1, dtype=np.int64)
                sentiment_label[0] = int(i)
                sentiment_label = to_tensor(sentiment_label)
                output_poem, output_tokens = model.test(
                    title_tensor, title_list, sentiment_label=sentiment_label)

                titles[i].append(output_poem.strip().split('\n')[0])
                predict_results[i] += (np.array(output_tokens)[:, :7].tolist())

        # Predict sentiment use the sort net
        from collections import defaultdict
        neg = defaultdict(int)
        neu = defaultdict(int)
        pos = defaultdict(int)
        total = defaultdict(int)
        for i in range(3):
            _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i])
            total[i] = neg[i] + neu[i] + pos[i]

        for i in range(3):
            print("%d%%\t%d%%\t%d%%" %
                  (neg * 100 / total, neu * 100 / total, pos * 100 / total))

        for i in range(3):
            write_predict_result_to_file(titles[i], predict_results[i],
                                         sentiment_result[i], output_file[i])
            output_file[i].close()

        print("Done testing")
Example #30
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_loss = float('inf')
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        # model
        encoder = Encoder(args.einput, args.ehidden, args.elayer,
                          dropout=args.edropout, bidirectional=args.ebidirectional,
                          rnn_type=args.etype)
        decoder = Decoder(vocab_size, args.dembed, sos_id,
                          eos_id, args.dhidden, args.dlayer,
                          bidirectional_encoder=args.ebidirectional)
        model = Seq2Seq(encoder, decoder)
        print(model)
        model.cuda()

        optimizer = torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Custom dataloaders
    train_dataset = AiShellDataset(args, 'train')
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=pad_collate,
                                               pin_memory=True, shuffle=True, num_workers=num_workers)
    valid_dataset = AiShellDataset(args, 'dev')
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, collate_fn=pad_collate,
                                               pin_memory=True, shuffle=False, num_workers=num_workers)

    # Epochs
    for epoch in range(start_epoch, args.epochs):
        # Halving learning rate when get small improvement
        if args.half_lr and epochs_since_improvement > 0:
            adjust_learning_rate(optimizer, 0.5)

        # One epoch's training
        train_loss = train(train_loader=train_loader,
                           model=model,
                           optimizer=optimizer,
                           epoch=epoch,
                           logger=logger)
        writer.add_scalar('Train_Loss', train_loss, epoch)

        lr = get_learning_rate(optimizer)
        print('Learning rate: {}\n'.format(lr))
        writer.add_scalar('Learning_Rate', lr, epoch)

        # One epoch's validation
        valid_loss = valid(valid_loader=valid_loader,
                           model=model,
                           logger=logger)
        writer.add_scalar('Valid_Loss', valid_loss, epoch)

        # Check if there was an improvement
        is_best = valid_loss < best_loss
        best_loss = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            print("\nEpochs since last improvement: %d\n" % (epochs_since_improvement,))
        else:
            epochs_since_improvement = 0

        # Save checkpoint
        save_checkpoint(epoch, epochs_since_improvement, model, optimizer, best_loss, is_best)