コード例 #1
0
    path_eval_sentences = os.path.join(args.data_dir, 'dev.pkl')

    # Load Vocabularies
    words = tf.contrib.lookup.index_table_from_file(path_words,
                                                    num_oov_buckets=1)

    # Create the input data pipeline
    logging.info("Creating the datasets...")
    train_sentences = load_dataset_from_text(path_train_sentences)

    eval_sentences = load_dataset_from_text(path_eval_sentences)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.dev_size
    params.buffer_size = params.train_size  # buffer size for shuffling
    params.id_pad_word = words.lookup(tf.constant(params.pad_word))

    # Create the two iterators over the two datasets
    train_inputs = input_fn('train', train_sentences, words, params)
    eval_inputs = input_fn('eval', eval_sentences, words, params)
    logging.info("- done.")

    # Define the models (2 different set of nodes that share weights for train and eval)
    logging.info("Creating the model...")
    train_model_spec = model_fn('train', train_inputs, params)
    eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True)
    logging.info("- done.")

    # Train the model
    logging.info("Starting training for {} epoch(s)".format(params.num_epochs))
    train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
コード例 #2
0
ファイル: evaluate.py プロジェクト: aniksh/dual_encoder
    # Get paths for vocabularies and dataset
    path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq))
    params.vocab_path = path_vocab
    path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt')
    path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt')
    # Load Vocabularies
    vocab = tf.contrib.lookup.index_table_from_file(
        path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0)

    # Create the input data pipeline
    logging.info("Creating the dataset...")
    test_queries = load_dataset_from_text(path_test_queries, vocab, params)
    test_articles = load_dataset_from_text(path_test_articles, vocab, params)

    # Specify other parameters for the dataset and the model
    params.eval_size = params.test_size
    params.id_pad_word = vocab.lookup(tf.constant(params.pad_word))

    # Create iterator over the test set
    inputs = input_fn('eval', test_queries, test_articles, params)
    logging.info("- done.")

    # Define the model
    logging.info("Creating the model...")
    model_spec = model_fn('eval', inputs, params, reuse=False)
    logging.info("- done.")

    logging.info("Starting evaluation")
    evaluate(model_spec, args.model_dir, params, args.restore_from)
コード例 #3
0
path_sentiment_tags = os.path.join(args.data_dir, 'sentiment_tags.txt')
# path_reviews = os.path.join(args.data_dir, 'reviews_small.txt')
path_reviews = os.path.join(args.data_dir, 'reviews{}.txt'.format(toy))
path_sentiments = os.path.join(args.data_dir, 'sentiments{}.txt'.format(toy))
# path_sentiments = os.path.join(args.data_dir, 'sentiments.txt')

# Load vocabularies
words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets)
sentiments = tf.contrib.lookup.index_table_from_file(path_sentiment_tags)

# Create the input data pipeline
reviews = load_dataset_from_text(path_reviews,words)
review_sentiments = load_dataset_from_text(path_sentiments,sentiments, isLabels=True)

# Specify other parameters for the dataset and the model
params_sentiment.id_pad_word = words.lookup(tf.constant(params_sentiment.pad_word))
params_sentiment.id_pad_tag = words.lookup(tf.constant(params_sentiment.pad_tag))


# Create the iterator over the test set
inputs_sentiment = input_fn('eval', reviews, review_sentiments, params_sentiment)

# Define the model
print('Creating sentiment and era models...')
model_spec_sentiment = model_fn('eval', inputs_sentiment, params_sentiment, reuse=False)
print('Done')

# Evaluate the model... 
# evaluate(model-spec, args.model_dir, params, args.restore_from)

# initialize saver to restore model