path_eval_sentences = os.path.join(args.data_dir, 'dev.pkl') # Load Vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=1) # Create the input data pipeline logging.info("Creating the datasets...") train_sentences = load_dataset_from_text(path_train_sentences) eval_sentences = load_dataset_from_text(path_eval_sentences) # Specify other parameters for the dataset and the model params.eval_size = params.dev_size params.buffer_size = params.train_size # buffer size for shuffling params.id_pad_word = words.lookup(tf.constant(params.pad_word)) # Create the two iterators over the two datasets train_inputs = input_fn('train', train_sentences, words, params) eval_inputs = input_fn('eval', eval_sentences, words, params) logging.info("- done.") # Define the models (2 different set of nodes that share weights for train and eval) logging.info("Creating the model...") train_model_spec = model_fn('train', train_inputs, params) eval_model_spec = model_fn('eval', eval_inputs, params, reuse=True) logging.info("- done.") # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(train_model_spec, eval_model_spec, args.model_dir,
# Get paths for vocabularies and dataset path_vocab = os.path.join(args.data_dir, 'vocab{}'.format(params.min_freq)) params.vocab_path = path_vocab path_test_queries = os.path.join(args.data_dir, 'dev/queries.txt') path_test_articles = os.path.join(args.data_dir, 'dev/articles.txt') # Load Vocabularies vocab = tf.contrib.lookup.index_table_from_file( path_vocab, num_oov_buckets=num_oov_buckets, key_column_index=0) # Create the input data pipeline logging.info("Creating the dataset...") test_queries = load_dataset_from_text(path_test_queries, vocab, params) test_articles = load_dataset_from_text(path_test_articles, vocab, params) # Specify other parameters for the dataset and the model params.eval_size = params.test_size params.id_pad_word = vocab.lookup(tf.constant(params.pad_word)) # Create iterator over the test set inputs = input_fn('eval', test_queries, test_articles, params) logging.info("- done.") # Define the model logging.info("Creating the model...") model_spec = model_fn('eval', inputs, params, reuse=False) logging.info("- done.") logging.info("Starting evaluation") evaluate(model_spec, args.model_dir, params, args.restore_from)
path_sentiment_tags = os.path.join(args.data_dir, 'sentiment_tags.txt') # path_reviews = os.path.join(args.data_dir, 'reviews_small.txt') path_reviews = os.path.join(args.data_dir, 'reviews{}.txt'.format(toy)) path_sentiments = os.path.join(args.data_dir, 'sentiments{}.txt'.format(toy)) # path_sentiments = os.path.join(args.data_dir, 'sentiments.txt') # Load vocabularies words = tf.contrib.lookup.index_table_from_file(path_words, num_oov_buckets=num_oov_buckets) sentiments = tf.contrib.lookup.index_table_from_file(path_sentiment_tags) # Create the input data pipeline reviews = load_dataset_from_text(path_reviews,words) review_sentiments = load_dataset_from_text(path_sentiments,sentiments, isLabels=True) # Specify other parameters for the dataset and the model params_sentiment.id_pad_word = words.lookup(tf.constant(params_sentiment.pad_word)) params_sentiment.id_pad_tag = words.lookup(tf.constant(params_sentiment.pad_tag)) # Create the iterator over the test set inputs_sentiment = input_fn('eval', reviews, review_sentiments, params_sentiment) # Define the model print('Creating sentiment and era models...') model_spec_sentiment = model_fn('eval', inputs_sentiment, params_sentiment, reuse=False) print('Done') # Evaluate the model... # evaluate(model-spec, args.model_dir, params, args.restore_from) # initialize saver to restore model