Python HParams.vocab Examples

Programming Language: Python

Namespace/Package Name: tensorflow.contrib.training

Class/Type: HParams

Method/Function: vocab

Examples at hotexamples.com: 2

Python HParams.vocab - 2 examples found. These are the top rated real world Python examples of tensorflow.contrib.training.HParams.vocab extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_hparam(20)

HParams(13)

parse(8)

set_hparam(6)

values(4)

override_from_dict(3)

vocab(2)

parse_json(2)

num_samples(1)

to_json(1)

target_sdim(1)

target_adim(1)

state_mismatch(1)

num_labels(1)

__init__(1)

num_features(1)

num_epochs(1)

num_classes(1)

load_T(1)

labels_path(1)

img_size(1)

hidden_size(1)

get(1)

cams_to_load(1)

action_mismatch(1)

vocab_path(1)

Example #1

Show file

def main(_):

    # tf.estimator will load/reuse anything found in its model_dir, so
    # we make sure to clear its contents before every training run.
    # For predictions, however, we of course want to load the previously
    # trained model from disk.
    if tf.gfile.Exists(args.model_dir) and not args.predict_only:
        tf.gfile.DeleteRecursively(args.model_dir)
    tf.gfile.MakeDirs(args.model_dir)

    hparams = HParams(**vars(args))

    # We will use the 20 newsgroups dataset to train our model.
    # Note that we won't be using the labels, since our model is simply
    # learning to reconstruct its inputs as its output.
    train_file_path = os.path.join(hparams.data_dir,
                                   '20ng-train-all-terms.txt')

    # Define the path to the file that we'll store our vocabulary in.
    # This file will have the same number of lines as our vocab_size.
    # Each line will contain a single word in our vocabulary, listed in
    # order of decreasing frequency seen in our training data.
    vocab_path = os.path.join(hparams.processed_data_dir, 'vocab.txt')

    # Data preparation: getting vocabulary and saving tfrecords format.
    if not tf.gfile.Exists(vocab_path):
        print('Extracting vocab, labels, and tokenized texts from data.')
        vocab, labels, texts = newsgroups.fit_and_extract(
            train_file_path, hparams.vocab_size)
        print('Saving vocabulary to {}.'.format(vocab_path))
        with open(vocab_path, 'w+') as f:
            f.write('\n'.join(vocab))

        tfrecords_path = os.path.join(hparams.processed_data_dir,
                                      'embed.tfrecords')
        print('Saving tfrecords to {}.'.format(tfrecords_path))
        tfrecords.save_tfrecords(out_path=tfrecords_path,
                                 labels=labels,
                                 texts=texts,
                                 vocab=vocab)
    else:
        print('Reading existing vocabulary from {}.'.format(vocab_path))
        with open(vocab_path) as f:
            vocab = [l.strip() for l in f.readlines()]

    hparams.vocab = vocab
    print('Creating autoencoder.')
    autoencoder = tf.estimator.Estimator(
        model_fn=model_fn,
        model_dir=hparams.model_dir,
        config=tf.estimator.RunConfig(log_step_count_steps=10000),
        params=hparams)

    if not args.predict_only:
        print('Training autoencoder.')
        autoencoder.train(
            input_fn=lambda: input_fn(hparams.processed_data_dir, hparams),
            steps=1000)

    sample_sentences = [
        'i like dogs', 'i am a test sentence',
        'TensorFlow is a fun library to use'
    ]
    pred_inputs = []
    for sent in sample_sentences:
        token_ids = [
            vocab.index(w) for w in sent.split()[:args.max_seq_len]
            if w in vocab
        ]
        # Pad if necessary.
        if len(token_ids) < args.max_seq_len:
            token_ids.extend([0] * (args.max_seq_len - len(token_ids)))
        pred_inputs.append(token_ids)

    pred_inp_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': np.asarray(pred_inputs)}, shuffle=False)
    predictions = autoencoder.predict(input_fn=pred_inp_fn)

    print('Sample predictions:')
    for i, prediction in enumerate(predictions):
        clean_prediction = ' '.join(
            [tok.decode() for tok in prediction if tok != b'_UNK'])
        print('\nExpected:', sample_sentences[i], sep='\t')
        print('Actual:  ', clean_prediction, sep='\t')

Example #2

Show file

def main():
    # tf.estimator will load/reuse anything found in its model_dir, so
    # we make sure to clear its contents before every training run.
    # For predictions, however, we of course want to load the previously
    # trained model from disk.
    if tf.gfile.Exists(args.model_dir):
        tf.gfile.DeleteRecursively(args.model_dir)
    tf.gfile.MakeDirs(args.model_dir)
    tf.gfile.MakeDirs(args.processed_data_dir)
    tf.gfile.Copy(os.path.join(args.data_dir, 'labels.txt'),
                  os.path.join(args.processed_data_dir, 'labels.txt'),
                  overwrite=True)

    hparams = HParams(**vars(args))

    # Define the path to the file that we'll store our vocabulary in.
    # This file will have the same number of lines as our vocab_size.
    # Each line will contain a single word in our vocabulary, listed in
    # order of decreasing frequency seen in our training data.
    vocab_path = os.path.join(hparams.processed_data_dir,
                              'vocab_{}.txt'.format(hparams.vocab_size))

    # Data preparation: getting vocabulary and saving tfrecords format.
    if not tf.gfile.Exists(vocab_path):
        for mode in ['train', 'test']:
            data_file_path = os.path.join(hparams.data_dir,
                                          '20ng-{}-all-terms.txt'.format(mode))

            print('Extracting vocab, labels, and tokenized texts from data.')
            if mode == 'train':
                vocab, labels, texts = newsgroups.fit_and_extract(
                    data_file_path, hparams.vocab_size)
                print('Saving vocabulary to {}.'.format(vocab_path))
                with open(vocab_path, 'w+') as f:
                    f.write('\n'.join(vocab))
            else:
                _, labels, texts = newsgroups.fit_and_extract(
                    data_file_path, hparams.vocab_size)

            tfrecords_path = os.path.join(
                hparams.processed_data_dir,
                '20ng_advanced_{}_{}.tfrecords'.format(mode,
                                                       hparams.vocab_size))
            print('Saving tfrecords to {}.'.format(tfrecords_path))
            tfrecords.save_tfrecords(out_path=tfrecords_path,
                                     labels=labels,
                                     texts=texts,
                                     vocab=vocab)
    else:
        print('Reading existing vocabulary from {}.'.format(vocab_path))
        with open(vocab_path) as f:
            vocab = [l.strip() for l in f.readlines()]

    hparams.vocab = vocab
    print('Creating classifier.')
    classifier = tf.estimator.Estimator(model_fn=model_fn,
                                        model_dir=hparams.model_dir,
                                        config=tf.estimator.RunConfig(
                                            save_summary_steps=100,
                                            save_checkpoints_steps=500,
                                            log_step_count_steps=10000,
                                        ),
                                        params=hparams)

    for i in range(hparams.num_iter):
        classifier.train(input_fn=lambda: input_fn(hparams, 'train'),
                         hooks=[
                             tf.train.ProfilerHook(
                                 save_steps=100, output_dir=hparams.model_dir)
                         ],
                         steps=hparams.train_steps)
        classifier.evaluate(input_fn=lambda: input_fn(hparams, 'test'),
                            steps=hparams.eval_steps)