コード例 #1
0
ファイル: train.py プロジェクト: lopuhin/char-rnn-models
def validate(args, model: CharRNN, criterion, char_to_id, pbar=False):
    model.eval()
    valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8')
    batch_size = 1
    window_size = 4096
    hidden = model.init_hidden(batch_size)
    total_loss = n_chars = 0
    total_word_loss = n_words = 0
    r = tqdm.trange if pbar else range
    for idx in r(
            0, min(args.valid_chars or len(valid_corpus),
                   len(valid_corpus) - 1), window_size):
        chunk = valid_corpus[idx:idx + window_size + 1]
        inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0),
                          volatile=True)
        targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0))
        losses = []
        for c in range(inputs.size(1)):
            output, hidden = model(inputs[:, c], hidden)
            loss = criterion(output.view(batch_size, -1), targets[:, c])
            losses.append(loss.data[0])
            n_chars += 1
        total_loss += np.sum(losses)
        word_losses = word_loss(chunk, losses)
        total_word_loss += np.sum(word_losses)
        n_words += len(word_losses)
    mean_loss = total_loss / n_chars
    mean_word_perplexity = np.exp(total_word_loss / n_words)
    print('Validation loss: {:.3}, word perplexity: {:.1f}'.format(
        mean_loss, mean_word_perplexity))
    return {
        'valid_loss': mean_loss,
        'valid_word_perplexity': mean_word_perplexity
    }
コード例 #2
0
ファイル: helper.py プロジェクト: Enkhai/lstm-example
def sample(model: CharRNN,
           char2int: dict,
           prime='The',
           num_chars=1000,
           top_k=5):
    """
    Given a network and a char2int map, predict the next 1000 characters
    """

    device = next(model.parameters()).device.type

    int2char = {ii: ch for ch, ii in char2int.items()}

    # set our model to evaluation mode, we use dropout after all
    model.eval()

    # First off, run through the prime characters
    chars = [char2int[ch] for ch in prime]
    h = model.init_hidden(1, device)
    for ch in chars:
        char, h = predict(model, ch, h, top_k, device)

    chars.append(char)

    # Now pass in the previous character and get a new one
    for ii in range(num_chars):
        char, h = predict(model, chars[-1], h, top_k, device)
        chars.append(char)

    return ''.join(int2char[c] for c in chars)
コード例 #3
0
ファイル: train.py プロジェクト: lopuhin/char-rnn-models
def train_model(model: CharRNN, criterion, optimizer, inputs: Variable,
                targets: Variable) -> float:
    batch_size = inputs.size(0)
    window_size = inputs.size(1)
    hidden = cuda(model.init_hidden(batch_size))
    model.zero_grad()
    loss = 0
    for c in range(window_size):
        output, hidden = model(inputs[:, c], hidden)
        loss += criterion(output.view(batch_size, -1), targets[:, c])
    loss.backward()
    optimizer.step()
    return loss.data[0] / window_size
コード例 #4
0
ファイル: train.py プロジェクト: hacksman/char_rnn
def main(_):
    # 设置模型的保存路径
    model_path = os.path.join('model', FLAGS.name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)
    # 载入待训练文件
    with codecs.open(FLAGS.input_file, encoding='utf-8') as f:
        text = f.read()

    # 构建文字转换的实例
    converter = TextCoverter(text, FLAGS.max_vocab)
    # 保存已转换的文字实例的序列化数据,供后面的模型使用
    converter.save_to_file(os.path.join(model_path, 'converter.pkl'))

    # 将词转换成对应的词典中的位置的索引, 如“寒随穷律变,春逐鸟声开。初风飘带柳,晚雪间花梅。”, 因为','和句号在词典中排在前两位,
    # 则它们对应的索引是'0'和'1',此处对应的arr即为[15 17 12 22 6 0 5 8 18 19 16 1 4 7 2 21 3 9 0 10 11 20 13 14 1]
    arr = converter.text_to_arr(text)
    g = batch_generator(arr, FLAGS.num_seqs, FLAGS.num_steps)
    for x, y in g:
        print(x)
        print(y)
        break
    print("This is vocabulary size length: {}".format(converter.vocab_size))

    # 模型搭建
    model = CharRNN(converter.vocab_size,
                    num_seqs=FLAGS.num_seqs,
                    num_steps=FLAGS.num_steps,
                    lstm_size=FLAGS.lstm_size,
                    num_layers=FLAGS.num_layers,
                    learning_rate=FLAGS.learning_rate,
                    train_keep_prob=FLAGS.train_keep_prob,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size)
コード例 #5
0
ファイル: train.py プロジェクト: lopuhin/char-rnn-models
def train(args, model: CharRNN, step, epoch, corpus, char_to_id, criterion,
          model_file):
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    batch_chars = args.window_size * args.batch_size
    save = lambda ep: torch.save(
        {
            'state': model.state_dict(),
            'epoch': ep,
            'step': step,
        }, str(model_file))
    log = Path(args.root).joinpath('train.log').open('at', encoding='utf8')
    for epoch in range(epoch, args.n_epochs + 1):
        try:
            losses = []
            n_iter = args.epoch_batches or (len(corpus) // batch_chars)
            report_each = min(10, n_iter - 1)
            tr = tqdm.tqdm(total=n_iter * batch_chars)
            tr.set_description('Epoch {}'.format(epoch))
            model.train()
            for i in range(n_iter):
                inputs, targets = random_batch(
                    corpus,
                    batch_size=args.batch_size,
                    window_size=args.window_size,
                    char_to_id=char_to_id,
                )
                loss = train_model(model, criterion, optimizer, inputs,
                                   targets)
                step += 1
                losses.append(loss)
                tr.update(batch_chars)
                mean_loss = np.mean(losses[-report_each:])
                tr.set_postfix(loss=mean_loss)
                if i and i % report_each == 0:
                    write_event(log, step, loss=mean_loss)
            tr.close()
            save(ep=epoch + 1)
        except KeyboardInterrupt:
            print('\nGot Ctrl+C, saving checkpoint...')
            save(ep=epoch)
            print('done.')
            return
        if args.valid_corpus:
            valid_result = validate(args, model, criterion, char_to_id)
            write_event(log, step, **valid_result)
    print('Done training for {} epochs'.format(args.n_epochs))
コード例 #6
0
ファイル: sample.py プロジェクト: liusiqi43/char-rnn-tf
def main(unused_args):
  with open(os.path.join(FLAGS.session_dir, 'labels.pkl')) as f:
    char_to_id = pickle.load(f)
  with open(os.path.join(FLAGS.session_dir, 'config.pkl')) as f:
    config = pickle.load(f)
  with tf.variable_scope('model'):
    m = CharRNN('infer', config)
  with tf.Session() as sess:
    tf.initialize_all_variables().run()
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state(FLAGS.session_dir)
    if ckpt and ckpt.model_checkpoint_path:
      saver.restore(sess, ckpt.model_checkpoint_path)
      print(ckpt.model_checkpoint_path, 'restored')

      while True:
        seed = raw_input('seed:')
        start_time = time.time()
        print(m.sample(sess, char_to_id, FLAGS.num_steps, seed))
        print(FLAGS.num_steps / (time.time() - start_time), 'cps')
コード例 #7
0
ファイル: sample.py プロジェクト: hacksman/char_rnn
def main(_):
    FLAGS.start_string = FLAGS.start_string
    converter = TextCoverter(filename=FLAGS.converter_path)
    if os.path.isdir(FLAGS.checkpoint_path):
        FLAGS.checkpoint_path = tf.train.latest_checkpoint(
            FLAGS.checkpoint_path)

    model = CharRNN(
        converter.vocab_size,
        sampling=True,
        lstm_size=FLAGS.lstm_size,
        num_layers=FLAGS.num_layers,
        use_embedding=FLAGS.use_embedding,
        embedding_size=FLAGS.embedding_size,
    )

    model.load(FLAGS.checkpoint_path)

    start = converter.text_to_arr(FLAGS.start_string)
    arr = model.sample(FLAGS.max_length, start, converter.vocab_size)
    print(converter.arr_to_text(arr))
コード例 #8
0
ファイル: helper.py プロジェクト: Enkhai/lstm-example
def count_parameters(model: CharRNN):
    """
    counts the total number of parameters in a model
    """
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
コード例 #9
0
                                  seq_length=seq_length,
                                  device=device)
    validation_data = CharacterDataset(validation_text,
                                       vocabulary,
                                       batch_size=batch_size,
                                       seq_length=seq_length,
                                       device=device)

    # and make our data loaders
    # batch size is exactly 1 character by default, which is exactly what we need
    train_loader = DataLoader(train_data)
    validation_loader = DataLoader(validation_data)

    # Part 3: modelling
    # we create our model
    model = CharRNN(num_chars).to(device)
    # and the initial hidden state (a tensor of zeros)
    initial_state = model.init_hidden(batch_size, device)

    # we evaluate the capability of our model
    # a character to parameter ratio approaching 1 is optimal
    # too many parameters and the model may overfit
    # too few and the model may underfit
    char_param_ratio = len(text) / count_parameters(model)
    print("Character to model parameter ratio: %f\n" % char_param_ratio)

    # Part 4: training
    train(model,
          initial_state,
          train_loader=train_loader,
          validation_loader=validation_loader,
コード例 #10
0
ファイル: model_1_testing.py プロジェクト: avenxu/dlproject
def sample(checkpoint,
           n_samples,
           lstm_size,
           vocab_size,
           prime="We",
           subject=[0, 0, 0, 0, 0, 0, 0]):
    """
    Sampling new text

    checkpoint
    n_sample: length of the sample
    lstm_size: number of hidden nodes
    vocab_size
    prime: start text
    """
    # convert input word to chars

    samples = [c for c in prime]
    # sampling=True means batch of size=1 x 1
    model = CharRNN.CharRNN(len(CharRNN.vocab),
                            lstm_size=lstm_size,
                            sampling=True,
                            feature_size=8)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # Restore session
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            # input single char
            x[0, 0] = CharRNN.vocab_to_int[c]
            subject_reshape = np.reshape(subject, (1, 1, -1)).astype(dtype=int)

            feed = {
                model.inputs: x,
                model.keep_prob: 1.,
                model.subject: subject_reshape,
                model.initial_state: new_state
            }
            preds, new_state = sess.run([model.prediction, model.final_state],
                                        feed_dict=feed)

        c = pick_top_n(preds, len(CharRNN.vocab))
        # add new predictions to the sampling
        samples.append(CharRNN.int_to_vocab[c])

        # generate new chars till the limit
        for i in range(n_samples):
            x[0, 0] = c
            subject_reshape = np.reshape(subject, (1, 1, -1)).astype(dtype=int)
            feed = {
                model.inputs: x,
                model.keep_prob: 1.,
                model.subject: subject_reshape,
                model.initial_state: new_state
            }
            preds, new_state = sess.run([model.prediction, model.final_state],
                                        feed_dict=feed)

            c = pick_top_n(preds, len(CharRNN.vocab))
            samples.append(CharRNN.int_to_vocab[c])

    return ''.join(samples)
コード例 #11
0
ファイル: model_1_training.py プロジェクト: avenxu/dlproject
batch_size = 10
num_steps = 100
lstm_size = 512
num_layers = 2
learning_rate = 0.001
keep_prob = 0.5
feature_size = 8

epochs = 100

save_every_n = 500

model = CharRNN.CharRNN(len(CharRNN.vocab),
                        batch_size=batch_size,
                        num_steps=num_steps,
                        lstm_size=lstm_size,
                        num_layers=num_layers,
                        learning_rate=learning_rate,
                        feature_size=feature_size)

saver = tf.train.Saver(max_to_keep=100)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    counter = 0
    for e in range(epochs):
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in CharRNN.get_batches(CharRNN.merged_data_matrix, batch_size,
                                        num_steps, feature_size):
コード例 #12
0
def main(
    representation,
    train=None,
    generate=None,
    temperature=DEFAULT_TEMPERATURE,
    max_generate_len=DEFAULT_MAX_GEN_LEN,
    generator_prime_str=FILE_START,
    window_size=DEFAULT_WINDOW_SIZE,
    batch_size=DEFAULT_BATCH_SIZE,
    disable_cuda=DEFAULT_DISABLE_CUDA,
    learning_rate=DEFAULT_LEARNING_RATE,
    num_epochs=DEFAULT_NUM_EPOCHS,
    patience=DEFAULT_PATIENCE,
    recurrent_type=DEFAULT_RECURRENT_TYPE,
    hidden_size=DEFAULT_RECURRENT_HIDDEN_SIZE,
    recurrent_layers=DEFAULT_RECURRENT_LAYERS,
    recurrent_dropout=DEFAULT_RECURRENT_DROPOUT,
    print_every_iter=DEFAULT_PRINT_EVERY_ITER,
    log_level=DEFAULT_LOG_LEVEL,
):
    # https://github.com/pytorch/pytorch/issues/13775
    torch.multiprocessing.set_start_method("spawn")

    logger.addHandler(logging.StreamHandler(sys.stdout))
    logger.setLevel(log_level)

    use_cuda = torch.cuda.is_available()
    if disable_cuda:
        use_cuda = False

    if representation == "char":
        # Create the neural network structure
        logger.info("Constructing the neural network architecture...")
        n_chars = len(CHARACTERS)
        nn = CharRNN(n_chars,
                     n_chars,
                     hidden_size=hidden_size,
                     recurrent_type=recurrent_type,
                     recurrent_layers=recurrent_layers,
                     recurrent_dropout=recurrent_dropout,
                     use_cuda=use_cuda)
        if use_cuda:
            nn.cuda()

        if train:
            # Warn if window_size is None, batch_size should be 1
            if window_size is None and batch_size is not 1:
                logger.warning("~" * 40)
                logger.warning(
                    "WARN: Undefined window_size with batch_size: {}".format(
                        batch_size))
                logger.warning(
                    "\tBatches may not have equal sequence lengths!")
                logger.warning(
                    "\tWindow size should be defined when batch_size > 1.")
                logger.warning("~" * 40)

            # Train our model
            train_full(nn,
                       max_window_size=window_size,
                       learning_rate=learning_rate,
                       n_epochs=num_epochs,
                       patience_threshold=patience,
                       batch_size=batch_size,
                       print_every=print_every_iter,
                       use_cuda=use_cuda)

        elif generate:
            progress_path = nn.get_progress_path()
            # Load our model
            logger.info("Loading the model weights...")
            path = nn.get_state_dict_path()
            if not os.path.isfile(path):
                raise FileNotFoundError(
                    ("Model does not exist at {}. " +
                     "Manual model renaming required.").format(path))
            nn.load_state_dict(torch.load(path))
            nn = nn.eval()
            generate_charseq(nn,
                             prime_str=generator_prime_str,
                             max_window_size=window_size,
                             max_generate_len=max_generate_len,
                             temperature=temperature)