Example #1
0
def main(_):
    model_path = os.path.join('model', FLAGS.name)  # 拼接路径model/'name'
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)
    with codecs.open(FLAGS.input_file, encoding='utf-8') as f:
        text = f.read()  # 读取文本
    converter = TextConverter(text,
                              FLAGS.max_vocab)  # 文本转换为词汇且截取FLAGS.max_vocab个词
    converter.save_to_file(os.path.join(model_path,
                                        'converter.pk1'))  # 序列化存储词汇

    data = converter.text_to_data(text)  # 将文本转化为输入(word_to_int)
    g = batch_generator(data, FLAGS.n_seqs, FLAGS.n_steps)  # 获取batch生成器
    print(converter.vocab_size)
    # 模型参数初始化
    model = CharRNN(converter.vocab_size,
                    n_seqs=FLAGS.n_seqs,
                    n_steps=FLAGS.n_steps,
                    state_size=FLAGS.state_size,
                    n_layers=FLAGS.n_layers,
                    learning_rate=FLAGS.learning_rate,
                    train_keep_prob=FLAGS.train_keep_prob,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size)
    model.train(g, FLAGS.max_steps, model_path, FLAGS.save_every_n,
                FLAGS.log_every_n)
Example #2
0
def beamsearchdecode():
    modelname = ''
    if config['attn']:
        modelname = 'att' + '_' + config['attn_model']

    data_loader = data_utils.batch_generator(testX,
                                             testY,
                                             batch_size=config['batch_size'],
                                             shuffle=False)
    data_len, labels = next(data_loader)
    data, lengths = data_len
    data, lengths, labels = torch.tensor(data, dtype=torch.long), torch.tensor(
        lengths, dtype=torch.long), torch.tensor(labels, dtype=torch.long)
    data, labels = data.to(computing_device), labels.to(computing_device)
    encoder_outputs, encoder_hidden = encoder(data, lengths)
    decoder_hidden = encoder_hidden
    max_target_len = config['max_len']
    loss = 0
    decoder_charid = torch.zeros_like(labels)
    batch_size = labels.shape[0]
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]
                                      ]).to(computing_device).transpose(0, 1)
    decoder_charid[:, 0] = decoder_input.reshape(-1)

    for t in range(max_target_len - 1):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden,
                                                 encoder_outputs)

        output_id = torch.argmax(decoder_output.detach(), dim=2)
        decoder_charid[:, t + 1] = output_id.squeeze()
        decoder_input = output_idevaluate_test(encoder, decoder)
Example #3
0
def main(_):
    start_time = time.time()
    model_path = os.path.join('model', FLAGS.name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)
    with open(FLAGS.input_file, 'r') as f:
        text = f.read()
    converter = TextConverter(text, FLAGS.max_vocab)
    converter.save_to_file(os.path.join(model_path, 'converter.pkl'))

    arr = converter.text_to_arr(text)
    g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps)
    print(converter.vocab_size)
    model = CharRNN(converter.vocab_size,
                    num_seqs=FLAGS.num_seqs,
                    num_steps=FLAGS.num_steps,
                    lstm_size=FLAGS.lstm_size,
                    num_layers=FLAGS.num_layers,
                    learning_rate=FLAGS.learning_rate,
                    train_keep_prob=FLAGS.train_keep_prob,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size)
    model.train(
        g,
        FLAGS.max_steps,
        model_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
    print("Timing cost is --- %s ---second(s)" % (time.time() - start_time))
 def train(self):
     logger.info("start train")
     self.sess = tf.Session()
     with self.sess as sess:
         sess.run(tf.global_variables_initializer())
         step = 0
         new_state = sess.run(self.initial_state)
         avg_loss = 0
         for x, y in batch_generator(self.dt.data, self.dt.labels,
                                     self.batch_size, self.seq_lengths):
             feed = {
                 self.inputs: x,
                 self.targets: y,
                 self.initial_state: new_state
             }
             batch_loss, new_state, _ = sess.run(
                 [self.loss, self.final_state, self.optimizer],
                 feed_dict=feed)
             step += 1
             avg_loss += batch_loss
             if step % 100 == 0:
                 print("steps: %d, batch_loss: %f" % (step, avg_loss / 100))
                 avg_loss = 0
                 correct_prediction = tf.equal(self.prob1, self.targets)
                 accuracy = tf.reduce_mean(
                     tf.cast(correct_prediction, "float"))
                 print("Accuracy:",
                       accuracy.eval({
                           self.inputs: x,
                           self.targets: y
                       }))
                 print("targets",
                       self.targets.eval({
                           self.inputs: x,
                           self.targets: y
                       }))
                 print("logits",
                       self.logits.eval({
                           self.inputs: x,
                           self.targets: y
                       }))
                 print("prob",
                       self.prob.eval({
                           self.inputs: x,
                           self.targets: y
                       }))
                 print("prob1",
                       self.prob1.eval({
                           self.inputs: x,
                           self.targets: y
                       }))
             if step == 7810:
                 break
Example #5
0
def train(word_vocab_size,
          tag_vocab_size,
          char_vocab_size,
          train_data,
          valid_data,
          epochs=20,
          word_embeddings=None):
    model = create_crf_on_lstm_model(word_vocab_size, tag_vocab_size, char_vocab_size, word_embeddings)
    optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    print(model)
    print("training model...")
    for epoch in range(1, epochs + 1):
        loss_sum = 0
        timer = time.time()
        batch_count = 0
        model.train()
        for word_x, char_x, y in batch_generator(*train_data):
            model.zero_grad()
            loss = torch.mean(model(word_x, char_x, y))
            loss.backward()
            optim.step()
            loss = scalar(loss)
            loss_sum += loss
            batch_count += 1
        timer = time.time() - timer
        loss_sum /= batch_count
        save_checkpoint('model', model, epoch, loss_sum, timer)

        loss_sum = 0
        batch_count = 0
        model.eval()
        with torch.no_grad():
            for word_x, char_x, y in batch_generator(*valid_data):
                loss_sum += scalar(torch.mean(model(word_x, char_x, y)))
                batch_count += 1

            print('validation loss: {}'.format(loss_sum / batch_count))
Example #6
0
def main(_):
    model_path = os.path.join('model', FLAGS.name)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    data = read_corpus(FLAGS.input_file)
    converter = TextConverter(data, FLAGS.max_vocab)
    converter.save_to_file(os.path.join(model_path, 'converter.pkl'))

    g = batch_generator(data, FLAGS.batch_size)
    print(converter.vocab_size)
    model = BilstmNer(converter.vocab_size,
                      converter.num_classes,
                      lstm_size=FLAGS.lstm_size,
                      learning_rate=FLAGS.learning_rate,
                      train_keep_prob=FLAGS.train_keep_prob,
                      embedding_size=FLAGS.embedding_size)
    model.train(
        g,
        FLAGS.max_steps,
        model_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
Example #7
0
def main(_):
    if os.path.exists(checkpoint_path) is False:
        os.makedirs(checkpoint_path)

    # 读取训练文本
    with open(datafile, 'r', encoding='utf-8') as f:
        train_data = f.read()

    # 加载/生成 词典
    vocabulary = Vocabulary()
    if FLAGS.vocab_file:
        vocabulary.load_vocab(FLAGS.vocab_file)
    else:
        vocabulary.build_vocab(train_data)
    vocabulary.save(FLAGS.vocab_file)

    input_ids = vocabulary.encode(train_data)

    g = batch_generator(input_ids, FLAGS.batch_size, FLAGS.num_steps)

    model = LSTMModel(vocabulary.vocab_size,
                      batch_size=FLAGS.batch_size,
                      num_steps=FLAGS.num_steps,
                      lstm_size=FLAGS.lstm_size,
                      num_layers=FLAGS.num_layers,
                      learning_rate=FLAGS.learning_rate,
                      train_keep_prob=FLAGS.train_keep_prob,
                      use_embedding=FLAGS.use_embedding,
                      embedding_size=FLAGS.embedding_size)
    model.train(
        g,
        FLAGS.max_steps,
        checkpoint_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
metrics = {
    'rsenses': ['accuracy', 'loss'],
}
model = Model(input=inputs, output=outputs)

model.summary()
model.compile(optimizer=c('optimizer', "adam"), loss=losses, metrics=metrics)

# load weights
log.info("previous weights ({})".format(args.model_dir))
#model.load_weights(weights_hdf5)  # weights of best training loss
model.load_weights(weights_val_hdf5)  # weights of best validation loss

# convert from dataset to numeric format
log.info("convert from dataset ({})".format(args.dataset_dir))
x, _ = next(batch_generator(dataset, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, len(dataset['rel_ids']), random_per_sample=0))

# make predictions
log.info("make predictions")
y = model.predict(x, batch_size=batch_size)

# valid outputs
TYPES = ['Explicit', 'Implicit', 'AltLex', 'EntRel', 'NoRel']
if args.lang == "en":
    SENSES = [
        'Expansion.Conjunction',  # most common
        'Temporal.Asynchronous.Precedence',
        'Temporal.Asynchronous.Succession',
        'Temporal.Synchrony',
        'Contingency.Cause.Reason',
        'Contingency.Cause.Result',
Example #9
0
def evaluate_test(encoder, decoder):
    modelname = config['decoder']
    if config['attn']:
        modelname = 'att' + '_' + config['attn_model']

    data_loader = data_utils.batch_generator(testX,
                                             testY,
                                             batch_size=config['batch_size'],
                                             shuffle=False)
    data_len, labels = next(data_loader)
    data, lengths = data_len
    data, lengths, labels = torch.tensor(data, dtype=torch.long), torch.tensor(
        lengths, dtype=torch.long), torch.tensor(labels, dtype=torch.long)
    data, labels = data.to(computing_device), labels.to(computing_device)
    encoder_outputs, encoder_hidden = encoder(data, lengths)
    decoder_hidden = encoder_hidden
    max_target_len = config['max_len']
    loss = 0
    decoder_charid = torch.zeros_like(labels)
    batch_size = labels.shape[0]
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]
                                      ]).to(computing_device).transpose(0, 1)
    #                 decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
    #                 output_id = torch.argmax(decoder_output.detach(),dim=2)
    #                 decoder_charid[:,t+1] = output_id.squeeze()
    #                 decoder_input = output_id
    #                 loss += criterion(decoder_output.squeeze(), target[:,t])
    #             loss /= lengths.float().me[[SOS_token for _ in range(batch_size)]]).to(computing_device).transpose(0,1)
    decoder_charid[:, 0] = decoder_input.reshape(-1)
    target = labels[:, 1:]

    decode_batch = beam_decode(target, decoder_hidden, decoder,
                               encoder_outputs)
    #     print(decode_batch.shape)

    decoder_charid = torch.tensor(decode_batch).squeeze()
    #     print(decoder_charid.shape)
    #     for t in range(max_target_len-1):
    #         decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
    #         output_id = torch.argmax(decoder_output.detach(),dim=2)
    #         decoder_charid[:,t+1] = output_id.squeeze()
    #         decoder_input = output_id

    data, decoder_charid, labels = data.cpu().numpy().tolist(
    ), decoder_charid.cpu().numpy().tolist(), labels.cpu().numpy().tolist()
    ori_input = []
    model_output = []
    target_output = []
    for i, sentence_id in enumerate(decoder_charid):
        condition = lambda t: t not in (PAD_token, EOS_token, SOS_token)
        input_sentence_id = list(filter(condition, data[i]))
        input_sentence = ' '.join(
            [metadata['idx2w'][idx] for idx in input_sentence_id])
        ori_input.append(input_sentence)
        sentence_id = list(filter(condition, sentence_id))
        sentence = ' '.join([metadata['idx2w'][idx] for idx in sentence_id])
        target_sentence_id = list(filter(condition, labels[i]))
        model_output.append(sentence)
        target_sentence = ' '.join(
            [metadata['idx2w'][idx] for idx in target_sentence_id])
        target_output.append(target_sentence)
    filename = 'log/' + modelname + 'result.txt'
    with open(filename, 'a') as f:
        for i, sentence in enumerate(model_output):
            print("Input:" + ori_input[i] + '\n' + 'Chatbot:' + sentence +
                  '\n \n')
Example #10
0
def run_epoch(encoder,
              decoder,
              feature,
              labels,
              training=False,
              encoder_optimizer=None,
              decoder_optimizer=None):

    batch_size = config['batch_size']
    epoch_loss = 0
    epoch_bleu = 0
    N = 1000
    N_minibatch_loss = 0.0
    beam_width = config['beam_width']

    data_loader = data_utils.batch_generator(feature,
                                             labels,
                                             batch_size=config['batch_size'])
    for minibatch_count, (data_len, labels) in enumerate(data_loader):

        if training:
            encoder_optimizer.zero_grad()
            decoder_optimizer.zero_grad()
        data, lengths = data_len
        data, lengths, labels = torch.tensor(
            data, dtype=torch.long), torch.tensor(
                lengths, dtype=torch.long), torch.tensor(labels,
                                                         dtype=torch.long)
        data, labels = data.to(computing_device), labels.to(computing_device)

        encoder_outputs, encoder_hidden = encoder(data, lengths)

        #         print(encoder_hidden.shape)
        #         assert 0==1

        decoder_hidden = encoder_hidden
        max_target_len = config['max_len']
        loss = 0
        #     ert 0==1
        if training:
            use_teacher_forcing = True if random.random(
            ) < config['teacher_forcing_ratio'] else False

        #when test or valid, don't use teacher_forcing
        else:
            use_teacher_forcing = False

        if config['teaching']:
            use_teacher_forcing = True

        decoder_charid = torch.zeros_like(labels)
        if use_teacher_forcing:
            decoder_charid[:, 0] = torch.LongTensor([[
                SOS_token for _ in range(batch_size)
            ]]).to(computing_device).reshape(-1)

            batch_size = labels.shape[0]
            target = labels[:, 1:]
            #             target = t for t in range(max_target_len-1):
            #                 decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            #                 output_id = torch.argmax(decoder_output.detach(),dim=2)
            #                 decoder_charid[:,t+1] = output_id.squeeze()
            #                 decoder_input = output_id
            #                 loss += criterion(decoder_output.squeeze(), target[:,t])
            #             loss /= lengths.float().mearget.contiguous().view(-1)

            decoder_input = labels[:, :-1]
            decoder_output, decoder_hidden = decoder(decoder_input,
                                                     decoder_hidden,
                                                     encoder_outputs)

            decoder_charid[:, 1:] = torch.argmax(decoder_output, dim=2)
            decoder_output = decoder_output.view(-1, config['vocab_size'])
            #             print(decoder_output.shape,target.shape)
            loss = criterion(decoder_output, target.reshape(-1))
        else:
            decoder_input = torch.LongTensor([[
                SOS_token for _ in range(batch_size)
            ]]).to(computing_device).transpose(0, 1)
            decoder_charid[:, 0] = decoder_input.reshape(-1)
            batch_size = labels.shape[0]
            #             print(decoder_input.shape)
            #             assert 0==1
            #             print(labels[:1])
            target = labels[:, 1:]
            #             decode_batch= beam_decode(target,decoder_hidden,decoder,encoder_outputs)
            #             print(decode_batch)
            #             print(decode_batch.shape)
            #             assert 0==1
            for t in range(max_target_len - 1):
                decoder_output, decoder_hidden = decoder(
                    decoder_input, decoder_hidden, encoder_outputs)
                output_id = torch.argmax(decoder_output.detach(), dim=2)
                decoder_charid[:, t + 1] = output_id.squeeze()
                decoder_input = output_id
                #                 print(decoder_output,target[:,t])
                loss += criterion(decoder_output.squeeze(), target[:, t])
#                 print(loss)
#                 assert 0==1
            loss /= lengths.float().mean()

#             decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
#             log_prob, indexes = torch.topk(decoder_output, beam_width)

#             sequence = []
#             nodes =[]

#             for node_id in range(beam_width):
#                 node = BeamSearchNode(decoder_hidden, decoder_input, indexes[:,:,node_id],log_prop[:,:,node_id], 1)
#                 node.append(node)

#             for t in range(max_target_len-2):
#                 output_list = ()
#                 hiddens=[]
#                 for node_id in range(beam_width):
#                     decoder_input = nodes[node_id].wordId
#                     decoder_hidden = nodes[node_id].hiddenstate
#                     hiddens.append(decoder_hidden)
#                     decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
#                     output_list.add(decoder_output)
#                 output_k = torch.concat(output_list,dim=2)
#                 log_prob,indexes = torch.topk(decoder_output,beam_width)
#                 previous_nodes = nodes
#                 nodes=[]
#                 for node_id in range(beam_width):
# #                     pre_id = int()
#                     node = BeamSearchNode(decoder_hidden, decoder_input, indexes[:,:,node_id],log_prop[:,:,node_id], 1)
#                     node.append(node)
#                 decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
#                 output_id = torch.argmax(decoder_output.detach(),dim=2)
#                 decoder_charid[:,t+1] = output_id.squeeze()
#                 decoder_input = output_id
#                 loss += criterion(decoder_output.squeeze(), target[:,t])
#             loss = criterion(decoder_charid[:,1:],target)
#             loss /= lengths.float().mean()
#                 log_prob, indexes = torch.topk(decoder_output, beam_width)

#                 print(indexes.shape)
#                 assert 0==1
        if training:
            loss.backward()
            nn.utils.clip_grad_norm(encoder.parameters(), 50)
            nn.utils.clip_grad_norm(decoder.parameters(), 50)
            encoder_optimizer.step()
            decoder_optimizer.step()


#         print(decoder_charid[:1])
#         assert 0==1
        epoch_bleu += batchBLEU(decoder_charid, labels)
        epoch_loss += loss.detach()
        N_minibatch_loss += loss.detach()
        #         loss = 0
        #         if minibatch_count >5000:
        #             print(minibatch_count)
        if (minibatch_count % N == 0) and (minibatch_count != 0):
            #             print('hhahahahah',minibatch_count)
            train_flag = "Training" if training else "Validating/Testing"
            print(train_flag + ' Average minibatch %d loss: %.3f' %
                  (minibatch_count, N_minibatch_loss / N))
            N_minibatch_loss = 0

    return epoch_bleu / minibatch_count, epoch_loss / minibatch_count
Example #11
0
# initialize weights
if not os.path.isfile(weights_hdf5):
    log.info("initialize weights")
else:
    log.info("previous weights ({})".format(args.experiment_dir))
    model.load_weights(weights_hdf5)

# prepare for training
log.info("prepare snapshots")
#if not os.path.isdir(train_snapshot_dir):
#train_snapshot = next(batch_generator(train, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, min(len(train['rel_ids']), snapshot_size), random_per_sample=0))
#    save_dict_of_np(train_snapshot_dir, train_snapshot)
#train_snapshot = load_dict_of_np(train_snapshot_dir)
#if not os.path.isdir(valid_snapshot_dir):
valid_snapshot = next(batch_generator(valid, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, min(len(valid['rel_ids']), snapshot_size), random_per_sample=0))
#    save_dict_of_np(valid_snapshot_dir, valid_snapshot)
#valid_snapshot = load_dict_of_np(valid_snapshot_dir)
train_iter = batch_generator(train, indexes, indexes_size, arg1_len, arg2_len, conn_len, punc_len, batch_size, random_per_sample=random_per_sample)

# train model
log.info("train model")
callbacks = [
    ModelCheckpoint(monitor='loss', mode='min', filepath=weights_hdf5, save_best_only=True),
    ModelCheckpoint(monitor='val_loss', mode='min', filepath=weights_val_hdf5, save_best_only=True),
    EarlyStopping(monitor='val_loss', mode='min', patience=epochs_patience),
]
history = model.fit_generator(train_iter, nb_epoch=epochs, samples_per_epoch=epochs_len, validation_data=valid_snapshot, callbacks=callbacks, verbose=2)
log.info("training finished")

# return best result for hyperopt
Example #12
0
    with sess.as_default():

        # Create model
        cnn = mnistCNN(dense=FLAGS.dense_size)

        # Trainer
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(
            cnn.loss)

        # Saver
        saver = tf.train.Saver(max_to_keep=1)

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        # Train proccess
        for epoch in range(FLAGS.num_epochs):
            for n_batch in range(int(55000 / FLAGS.batch_size)):
                batch = batch_generator(mnist_data,
                                        batch_size=FLAGS.batch_size,
                                        type='train')
                _, ce = sess.run([train_op, cnn.loss],
                                 feed_dict={
                                     cnn.input_x: batch[0],
                                     cnn.input_y: batch[1]
                                 })

            print(epoch, ce)
        model_file = saver.save(sess, '/tmp/mnist_model')
        print('Model saved in', model_file)