Esempio n. 1
0
 def _make_model(self):
     # embedding
     embedding = nn.Embedding(num_embeddings=self._config.vocab_size,
                              embedding_dim=self._config.embed_size)
     embedding.weight.data.copy_(
         torch.from_numpy(np.load(self._config.embedding_file_name)))
     embedding.weight.requires_grad = False
     # encoder
     encoder = Encoder(rnn_type=self._config.rnn_type,
                       embed_size=self._config.embed_size,
                       hidden_size=self._config.hidden_size,
                       num_layers=self._config.num_layers,
                       bidirectional=self._config.bidirectional,
                       dropout=self._config.dropout)
     # birdge
     bridge = Bridge(rnn_type=self._config.rnn_type,
                     hidden_size=self._config.hidden_size,
                     bidirectional=self._config.bidirectional)
     # decoder rnn cell
     if self._config.rnn_type == 'LSTM':
         rnn_cell = MultiLayerLSTMCells(
             input_size=2 * self._config.embed_size,
             hidden_size=self._config.hidden_size,
             num_layers=self._config.num_layers,
             dropout=self._config.dropout)
     else:
         rnn_cell = MultiLayerGRUCells(input_size=2 *
                                       self._config.embed_size,
                                       hidden_size=self._config.hidden_size,
                                       num_layers=self._config.num_layers,
                                       dropout=self._config.dropout)
     # attention
     if self._config.attention_type == 'Dot':
         attention = DotAttention()
     elif self._config.attention_type == 'ScaledDot':
         attention = ScaledDotAttention()
     elif self._config.attention_type == 'Additive':
         attention = AdditiveAttention(query_size=self._config.hidden_size,
                                       key_size=self._config.hidden_size)
     elif self._config.attention_type == 'Multiplicative':
         attention = MultiplicativeAttention(
             query_size=self._config.hidden_size,
             key_size=self._config.hidden_size)
     elif self._config.attention_type == 'MLP':
         attention = MultiLayerPerceptronAttention(
             query_size=self._config.hidden_size,
             key_size=self._config.hidden_size,
             out_size=1)
     else:
         raise ValueError('No Supporting.')
     # decoder
     decoder = Decoder(embedding, rnn_cell, attention,
                       self._config.hidden_size)
     # model
     model = Seq2Seq(embedding, encoder, bridge, decoder)
     return model
Esempio n. 2
0
 def make_model(self):
     model = Seq2Seq(vocab_size=self._config.vocab_size,
                     embed_size=self._config.embed_size,
                     hidden_size=self._config.hidden_size,
                     rnn_type=self._config.rnn_type,
                     num_layers=self._config.num_layers,
                     bidirectional=self._config.bidirectional,
                     attention_type=self._config.attention_type,
                     dropout=self._config.dropout)
     model.load_pretrained_embeddings(self._config.embedding_file_name)
     return model
Esempio n. 3
0
    def __init__(self, vocab_size, config):
        super(OCR, self).__init__()
        self.cnn = CNN()
        self.config = config

        self.transformer = Seq2Seq(
            vocab_size,
            encoder_hidden=config['seq_parameters']['encoder_hidden'],
            decoder_hidden=config['seq_parameters']['decoder_hidden'],
            img_channel=config['seq_parameters']['img_channel'],
            decoder_embedded=config['seq_parameters']['decoder_embedded'],
            dropout=config['seq_parameters']['dropout'])
Esempio n. 4
0
 def load_model(self, weights, device):
     INPUT_DIM = len(self.SRC.vocab)
     OUTPUT_DIM = len(self.TRG.vocab)
     enc = Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM,
                   ENC_DROPOUT, device)
     dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM,
                   DEC_DROPOUT, device)
     SRC_PAD_IDX = self.SRC.vocab.stoi[self.SRC.pad_token]
     TRG_PAD_IDX = self.TRG.vocab.stoi[self.TRG.pad_token]
     model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)
     model.load_state_dict(torch.load(weights))
     return model
Esempio n. 5
0
def main():
    args = parse_arguments()
    hidden_size = 512
    embed_size = 256
    # assert torch.cuda.is_available()

    print("[!] preparing dataset...")
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)
    print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" %
          (len(train_iter), len(
              train_iter.dataset), len(test_iter), len(test_iter.dataset)))
    print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size))

    print("[!] Instantiating models...")
    encoder = Encoder(de_size,
                      embed_size,
                      hidden_size,
                      n_layers=2,
                      dropout=0.5)
    decoder = Decoder(embed_size,
                      hidden_size,
                      en_size,
                      n_layers=1,
                      dropout=0.5)
    seq2seq = Seq2Seq(encoder, decoder)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr)
    print(seq2seq)

    best_val_loss = None
    for e in range(1, args.epochs + 1):
        train(e, seq2seq, criterion, optimizer, train_iter, en_size,
              args.grad_clip, DE, EN)
        val_loss = evaluate(seq2seq, criterion, val_iter, en_size, DE, EN)
        print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" %
              (e, val_loss, math.exp(val_loss)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            print("[!] saving model...")
            if not os.path.isdir(".save"):
                os.makedirs(".save")
            torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e))
            best_val_loss = val_loss
    test_loss = evaluate(seq2seq, criterion, test_iter, en_size, DE, EN)
    print("[TEST] loss:%5.2f" % test_loss)
Esempio n. 6
0
def main(args):
    checkpoint_path = os.path.join("saved/", args.name, args.checkpoint)
    checkpoint = torch.load(checkpoint_path)
    config = checkpoint['config']

    #if args.task.lower() == 'caption':
    embedder = eval(config['embedder']['type'])
    embedder_path = os.path.join("saved/", args.name, "embedder.pkl")
    data_loader = CaptionDataLoader(config,
                                    embedder,
                                    mode='test',
                                    path=args.data_dir,
                                    embedder_path=embedder_path)

    model = Seq2Seq(config, embedder=data_loader.embedder)
    model.load_state_dict(checkpoint['state_dict'])
    if not args.no_cuda:
        model.cuda()
    model.eval()
    model.summary()

    result = []
    for batch_idx, (in_seq, id) in enumerate(data_loader):
        in_seq = torch.FloatTensor(in_seq)
        in_seq = Variable(in_seq)
        if not args.no_cuda:
            in_seq = in_seq.cuda()
        if args.beam_size == 1:
            out_seq = model(in_seq, 24)
            out_seq = np.array([seq.data.cpu().numpy() for seq in out_seq])
            out_seq = np.transpose(out_seq, (1, 0, 2))
            out_seq = data_loader.embedder.decode_lines(out_seq)
        else:
            out_seq = beam_search(model,
                                  data_loader.embedder,
                                  in_seq,
                                  seq_len=24,
                                  beam_size=args.beam_size)
            out_seq = data_loader.embedder.decode_lines(out_seq)

        out_seq = [(str(id[0]), out_seq)]
        result.extend(out_seq)

    with open(args.output, 'w') as f:
        for video_id, caption in result:
            caption = postprocess(caption)
            f.write(video_id + ',' + caption + '\n')
def init_model_from_ckpt():
    _, _, _, train_data, valid_data, test_data = get_dataloaders_and_data()
    SRC_PAD_IDX = DOC.vocab.stoi[DOC.pad_token]
    TRG_PAD_IDX = DOC.vocab.stoi[DOC.pad_token]
    INPUT_DIM = len(DOC.vocab)
    OUTPUT_DIM = len(DOC.vocab)

    attn = Attention(ENC_HID_DIM, DEC_HID_DIM)
    enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                  ENC_DROPOUT)
    dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM,
                  DEC_DROPOUT, attn)

    model = Seq2Seq(enc, dec, SRC_PAD_IDX, device).to(device)
    most_recent_ckpt = get_most_recent_ckpt('ckpts')
    model.load_state_dict(torch.load(most_recent_ckpt))
    return model, train_data, valid_data, test_data
Esempio n. 8
0
def create_model(vocab_size):
    embedding = nn.Embedding(vocab_size, config.hidden_size) \
            if config.single_embedding else None

    encoder = Encoder(vocab_size, config.hidden_size, \
            n_layers = config.n_encoder_layers, dropout=config.dropout)


    decoder = Decoder(config.hidden_size, vocab_size,\
            n_layers = config.n_decoder_layers, dropout=config.dropout)

    model = Seq2Seq(encoder=encoder,
                    decoder=decoder,
                    max_length=config.max_length)

    if torch.cuda.is_available() and config.use_cuda:
        model.cuda()

    return model
Esempio n. 9
0
 def _make_model(self):
     embedding = nn.Embedding(self._config.vocab_size,
                              self._config.embed_size)
     embedding.weight.data.copy_(
         torch.from_numpy(np.load(self._config.embedding_file_name)))
     embedding.weight.requires_grad = False
     encoder = Encoder(self._config.embed_size, self._config.hidden_size,
                       self._config.num_layers, self._config.bidirectional,
                       self._config.dropout)
     bridge = Bridge(self._config.hidden_size, self._config.bidirectional)
     lstm_cell = MultiLayerLSTMCells(2 * self._config.embed_size,
                                     self._config.hidden_size,
                                     self._config.num_layers,
                                     dropout=self._config.dropout)
     # attention = MultiplicativeAttention(self._config.hidden_size, self._config.hidden_size)
     attention = AdditiveAttention(self._config.hidden_size,
                                   self._config.hidden_size)
     decoder = Decoder(embedding, lstm_cell, attention,
                       self._config.hidden_size)
     model = Seq2Seq(embedding, encoder, bridge, decoder)
     return model
Esempio n. 10
0
# parser = argparse.ArgumentParser()
# parser.add_argument('--batch-size', type=int, default=32)
# parser.add_argument('--enc-emb-dim', type=int, default=64)
# parser.add_argument('--dec-emb-dim', type=int, default=64)
# parser.add_argument('--hid-dim', type=int, default=216)
# parser.add_argument('--n-layers', type=int, default=2)
# parser.add_argument('--enc-dropout', type=float, default=0.5)
# parser.add_argument('--dec-dropout', type=float, default=0.5)

# args = parser.parse_args()

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, None)
optimizer = Adam(model.parameters())
criterion = CrossEntropyLoss()

train_loader = loader(train_data, BATCH_SIZE)
valid_loader = loader(valid_data, BATCH_SIZE)

N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
Esempio n. 11
0
        }
        json.dump(data, f)


##start main
args = get_args()
train_data, test_data = data_loader(args,
                                    "data/processed_data.json",
                                    first=True)
test_data = test_data if args.use_train_data == False else train_data

device_kind = "cuda:{}".format(
    args.cuda_number) if torch.cuda.is_available() else "cpu"
args.device = torch.device(device_kind)

model=Seq2Seq(args) if args.model_version==1 else \
        Seq2Seq2(args) if args.model_version==2 else \
        Transformer(args)

model.to(args.device)

if args.model_name != "":
    param = torch.load("model_data/{}".format(args.model_name))
    model.load_state_dict(param)
#start_epochが0なら最初から、指定されていたら学習済みのものをロードする
elif args.start_epoch >= 1:
    param = torch.load(
        "model_data/epoch_{}_model.pth".format(args.start_epoch - 1))
    model.load_state_dict(param)
else:
    args.start_epoch = 0
Esempio n. 12
0
DEC_HEADS = config['DEC_HEADS']
ENC_PF_DIM = config['ENC_PF_DIM']
DEC_PF_DIM = config['DEC_PF_DIM']
ENC_DROPOUT = config['ENC_DROPOUT']
DEC_DROPOUT = config['DEC_DROPOUT']

enc = Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM,
              ENC_DROPOUT, device)

dec = Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM,
              DEC_DROPOUT, device)

SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

print(f'The model has {count_parameters(model):,} trainable parameters')

model.apply(initialize_weights)

if config['train_embeddings']:
    model.decoder.tok1_embedding.load_state_dict(glovemodel.wi.state_dict())
    model.decoder.tok2_embedding.load_state_dict(glovemodel.wj.state_dict())

LEARNING_RATE = config['LEARNING_RATE']
N_EPOCHS = config['N_EPOCHS']
CLIP = config['CLIP']

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX)
Esempio n. 13
0
from model.seq2seq import Seq2Seq
import tensorflow as tf

sess = tf.Session()
# python3 -u  train_s2s.py -e 10 -i 100 -u 512 -g 5.0 -n 2 -em 500 -l 1.0 -d 0.3 -b 32 -o output/ -s 2000
s2s = Seq2Seq(sess,
              512, [10], [10],
              2,
              tensorboard_id=1,
              cell_type='GRU',
              mode='train',
              learning_rate=0.1,
              dropout=0.3,
              gradient_clip=5.0)
vars = tf.trainable_variables()
for v in vars:
    print(v)

print(sess.run(s2s.emb_test))
#s2s.restore('/Users/cem/Desktop/output/seq2seq')
Esempio n. 14
0
            else:
                yid.append(_yid[k])
                scores.append(_socres[k])
        yid = np.array(yid)
    return data.id2str(yid[np.argmax(scores)])

s1 = u'夏天来临,皮肤在强烈紫外线的照射下,晒伤不可避免,因此,晒后及时修复显得尤为重要,否则可能会造成长期伤害。专家表示,选择晒后护肤品要慎重,芦荟凝胶是最安全,有效的一种选择,晒伤严重者,还请及时就医。'
s2 = u'8月28日,网络爆料称,华住集团旗下连锁酒店用户数据疑似发生泄露。从卖家发布的内容看,数据包含华住旗下汉庭、禧玥、桔子、宜必思等10余个品牌酒店的住客信息。泄露的信息包括华住官网注册资料、酒店入住登记的身份信息及酒店开房记录,住客姓名、手机号、邮箱、身份证号、登录账号密码等。卖家对这个约5亿条数据打包出售。第三方安全平台威胁猎人对信息出售者提供的三万条数据进行验证,认为数据真实性非常高。当天下午,华住集团发声明称,已在内部迅速开展核查,并第一时间报警。当晚,上海警方消息称,接到华住集团报案,警方已经介入调查。'

class Evaluate(Callback):
    def __init__(self):
        self.lowest = 1e10

    def on_epoch_end(self, epoch, logs=None):
        print (gen_titles(s1))
        print (gen_titles(s2))
        if logs['loss'] <= self.lowest:
            self.lowest = logs['loss']
            model.save_weights('./model/best_model.weights')


evaluator = Evaluate()
model = Seq2Seq(config, chars).run()
model.compile(optimizer=Adam(1e-3)) # lr
model.fit_generator(data.get_data(), steps_per_epoch=1000, epochs=config.epochs, callbacks=[evaluator])





Esempio n. 15
0
                                          shuffle=False,
                                          collate_fn=collate_fn,
                                          drop_last=True)

INPUT_DIM = len(src_vocab)
OUTPUT_DIM = len(tag_vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
model = Seq2Seq(enc, dec)


# init weights
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)


model.apply(init_weights)


# calculate the number of trainable parameters in the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
train_loader = torchdata.DataLoader(dataset=data_loaded,
                                    collate_fn=data_loaded.custom_collate_fn,
                                    batch_size=batch_size)

trg_max_seq_len = next(
    iter(train_loader))[1].size(1) - 1  # <s> is not included

epochs = 1
interval = 1
learning_rate = 5e-2

model = Seq2Seq(hidden_size=hidden_size,
                vocab_len=vocab_len,
                embedding_size=embedding_size,
                batch_size=batch_size,
                pad_idx=pad_idx,
                trg_max_seq_len=trg_max_seq_len,
                device=device)

model.to(device)

optimizer = optim.Adadelta(model.parameters(), lr=learning_rate)
criterion = nn.NLLLoss(ignore_index=pad_idx)


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


print(f'Model parameters : {count_parameters(model):,}')
Esempio n. 17
0
    def decode_answer(self, answers):

        decode_answers = []

        for answer in answers:
            decode_answer = []
            for token_id in answer:
                if token_id == 0:
                    token = "<pad>"
                else:
                    token = self.data_provider.decode_vocab[token_id]
                if token == "<end>" or token == "<pad>":
                    break
                decode_answer.append(token)
            decode_answers.append(decode_answer)

        return decode_answers


if __name__ == "__main__":
    data_processor = DataProcessor("./data/QA_data/varicocele/",
                                   "./data/QA_data/varicocele/varicocele.json",
                                   word2vec="./data/word2vec/varicocele")
    model = Seq2Seq(data_processor.start_token, data_processor.vocab_embedding)
    trainer = Trainer(model, data_processor, learning_rate=5e-3, batch_size=8)
    trainer.train(train_epoch=100,
                  save_epoch=10,
                  display_step=100,
                  restore=True)
Esempio n. 18
0
def main(_):
    """
    Main function, loads and vectorizes data, builds model, then proceeds to start the training
    process.
    """
    # Load data (paths to the vocab, and tokens)
    fr_vocab, en_vocab, fr_train, en_train, _ = load_data()

    # Bucket train data
    train_set = bucket(fr_train, en_train)
    train_bucket_sizes = [len(train_set[b]) for b in xrange(len(buckets))]
    train_total_size = float(sum(train_bucket_sizes))
    print "Total Number of Training Examples", train_total_size

    # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use
    # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to
    # the size if i-th training bucket, as used later.
    train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size
                           for i in xrange(len(train_bucket_sizes))]

    # Get size of vocabularies
    french_vocab, _ = init_vocab(fr_vocab)
    english_vocab, _ = init_vocab(en_vocab)

    # Start Tensorflow Session
    with tf.Session() as sess:
        model = Seq2Seq(len(french_vocab), len(english_vocab), buckets, FLAGS.size,
                        FLAGS.num_layers, forward_only=False)
        ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir)
        if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
            print "Reading model parameters from %s" % ckpt.model_checkpoint_path
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print "Created model with fresh parameters."
            sess.run(tf.initialize_all_variables())

        # Start Training Loop
        step_time, loss, current_step = 0.0, 0.0, 0
        previous_losses = []
        while True:
            # Choose a bucket according to data distribution. We pick a random number in [0, 1] and
            # use the corresponding interval in train_buckets_scale.
            random_number_01 = np.random.random_sample()
            bucket_id = min([i for i in xrange(len(train_buckets_scale))
                             if train_buckets_scale[i] > random_number_01])

            # Get a batch and make a step.
            start_time = time.time()
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id)
            step_loss, embeddings, _ = model.step(sess, encoder_inputs, decoder_inputs,
                                                  target_weights, bucket_id, False)
            step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint
            loss += step_loss / FLAGS.steps_per_checkpoint
            current_step += 1
            progress(current_step % FLAGS.steps_per_checkpoint, FLAGS.steps_per_checkpoint,
                     "Step %s" % (current_step / FLAGS.steps_per_checkpoint))

            # Once in a while, we save checkpoint, and print statistics.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                print ""
                perplexity = math.exp(loss) if loss < 300 else float('inf')
                print ("Global step %d, Learning rate %.4f, Step-time %.2f, Perplexity %.2f" %
                      (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity))

                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)

                # Save checkpoint and zero timer and loss.
                checkpoint_path = os.path.join(FLAGS.log_dir, "translate.ckpt")
                model.saver.save(sess, checkpoint_path, global_step=model.global_step)
                step_time, loss = 0.0, 0.0
                sys.stdout.flush()
Esempio n. 19
0
def main(_):
    """
    Main function, instantiates model, loads and vectorizes source data, translates and outputs
    English translations.
    """
    # Load vocabularies
    fr_vocab_path = "data/vocabulary/fr.vocab"
    en_vocab_path = "data/vocabulary/en.vocab"

    fr2idx, idx2fr = init_vocab(fr_vocab_path)
    en2idx, idx2en = init_vocab(en_vocab_path)

    with tf.Session() as sess:
        # Create Model by Loading Parameters
        model = Seq2Seq(len(fr2idx),
                        len(en2idx),
                        buckets,
                        FLAGS.size,
                        FLAGS.num_layers,
                        forward_only=True)
        ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir)
        if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path):
            print "Reading model parameters from %s" % ckpt.model_checkpoint_path
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print "No model checkpoints found!"
            sys.exit(0)

        # Reset batch_size to 1
        model.batch_size = 1
        translations = []
        with tf.gfile.GFile(SOURCE_PATH, 'rb') as f:
            sentence = f.readline()
            while sentence:
                # Source file is already tokenized, just need to split at spaces
                token_ids = sentence.split()
                if len(token_ids) >= 50:
                    translations.append("")
                    sentence = f.readline()
                    continue

                # Pick which bucket it belongs to.
                bucket_id = min([
                    b for b in xrange(len(buckets))
                    if buckets[b][0] > len(token_ids)
                ])

                # Get a 1-element batch to feed the sentence to the model.
                encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                    {bucket_id: [(token_ids, [])]}, bucket_id)

                # Get output logits for the sentence.
                _, embedding, output_logits = model.step(
                    sess, encoder_inputs, decoder_inputs, target_weights,
                    bucket_id, True)

                # This is a greedy decoder - outputs are just argmaxes of output_logits.
                outputs = [
                    int(np.argmax(logit, axis=1)) for logit in output_logits
                ]

                # If there is an EOS symbol in outputs, cut them at that point.
                if EOS_ID in outputs:
                    outputs = outputs[:outputs.index(EOS_ID)]

                # Print out English sentence corresponding to outputs.
                translation = " ".join(
                    [tf.compat.as_str(idx2en[output]) for output in outputs])
                print translation
                translations.append(translation)
                sentence = f.readline()

        with tf.gfile.GFile(TARGET_PATH, 'wb') as f:
            for t in translations:
                f.write(t + "\n")
Esempio n. 20
0
            torch.save(model.state_dict(), "model_data/model.pth"\
                        .format(args.start_time,round(predict_rate,3),epoch))
            logger(args, "save model")


##start main
args = get_args()
train_data, test_data = data_loader(args,
                                    "data/processed_data.json",
                                    first=True)

device_kind = "cuda:{}".format(
    args.cuda_number) if torch.cuda.is_available() else "cpu"
args.device = torch.device(device_kind)

model = Seq2Seq(args)
model.to(args.device)

#start_epochが0なら最初から、指定されていたら学習済みのものをロードする
if args.start_epoch >= 1:
    param = torch.load(
        "model_data/epoch_{}_model.pth".format(args.start_epoch - 1))
    model.load_state_dict(param)
else:
    args.start_epoch = 0

optimizer = optim.Adam(model.parameters(), lr=args.lr)

logger(args, "use {}".format(device_kind))

for epoch in range(args.start_epoch, args.epoch_num):