def init(path): global stats_path, STATS stats_path = path try: STATS = read_json(stats_path) except Exception: STATS = {}
def __init__(self, schema, lexicon, model_path, fact_check, decoding, timed_session=False, consecutive_entity=True, realizer=None): super(NeuralSystem, self).__init__() self.schema = schema self.lexicon = lexicon self.timed_session = timed_session self.consecutive_entity = consecutive_entity # Load arguments args_path = os.path.join(model_path, 'config.json') config = read_json(args_path) config['batch_size'] = 1 config['gpu'] = 0 # Don't need GPU for batch_size=1 config['decoding'] = decoding args = argparse.Namespace(**config) mappings_path = os.path.join(model_path, 'vocab.pkl') mappings = read_pickle(mappings_path) vocab = mappings['vocab'] # TODO: different models have the same key now args.dropout = 0 logstats.add_args('model_args', args) model = build_model(schema, mappings, args) # Tensorflow config if args.gpu == 0: print 'GPU is disabled' config = tf.ConfigProto(device_count = {'GPU': 0}) else: gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5, allow_growth=True) config = tf.ConfigProto(device_count = {'GPU': 1}, gpu_options=gpu_options) # NOTE: need to close the session when done tf_session = tf.Session(config=config) tf.initialize_all_variables().run(session=tf_session) # Load TF model parameters ckpt = tf.train.get_checkpoint_state(model_path+'-best') assert ckpt, 'No checkpoint found' assert ckpt.model_checkpoint_path, 'No model path found in checkpoint' saver = tf.train.Saver() saver.restore(tf_session, ckpt.model_checkpoint_path) self.model_name = args.model if self.model_name == 'attn-copy-encdec': args.entity_target_form = 'graph' copy = True else: copy = False preprocessor = Preprocessor(schema, lexicon, args.entity_encoding_form, args.entity_decoding_form, args.entity_target_form, args.prepend) textint_map = TextIntMap(vocab, mappings['entity'], preprocessor) Env = namedtuple('Env', ['model', 'tf_session', 'preprocessor', 'vocab', 'copy', 'textint_map', 'stop_symbol', 'remove_symbols', 'max_len', 'evaluator', 'prepend', 'consecutive_entity', 'realizer']) self.env = Env(model, tf_session, preprocessor, mappings['vocab'], copy, textint_map, stop_symbol=vocab.to_ind(markers.EOS), remove_symbols=map(vocab.to_ind, (markers.EOS, markers.PAD)), max_len=20, evaluator=FactEvaluator() if fact_check else None, prepend=args.prepend, consecutive_entity=self.consecutive_entity, realizer=realizer)
help= 'Check if the utterance is true given the KB. Only work for simulated data.' ) add_scenario_arguments(parser) add_lexicon_arguments(parser) add_dataset_arguments(parser) add_neural_system_arguments(parser) add_heuristic_system_arguments(parser) args = parser.parse_args() logstats.init(args.stats_file) if args.random_seed: random.seed(args.random_seed) np.random.seed(args.random_seed) schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) lexicon = Lexicon(schema, args.learned_lex, stop_words=args.stop_words) if args.inverse_lexicon: realizer = InverseLexicon(schema, args.inverse_lexicon) else: realizer = None if args.train_max_examples is None: args.train_max_examples = scenario_db.size if args.test_max_examples is None: args.test_max_examples = scenario_db.size def get_system(name): if name == 'simple': return SimpleSystem(lexicon, realizer=realizer)
''' Output all scenarios in a chat transcript. ''' import sys import argparse from src.basic.util import read_json, write_json from src.basic.scenario_db import Scenario, ScenarioDB from src.basic.schema import Schema parser = argparse.ArgumentParser() parser.add_argument('--chats') parser.add_argument('--scenarios') parser.add_argument('--schema-path') args = parser.parse_args() chats = read_json(args.chats) schema = Schema(args.schema_path) scenarios = [] for chat in chats: scenarios.append(Scenario.from_dict(schema, chat['scenario'])) scenario_db = ScenarioDB(scenarios) write_json(scenario_db.to_dict(), args.scenarios)
action='store_true', help='Output html files') parser.add_argument('--outdir', default='.', help='Output dir') parser.add_argument('--stats', default='stats.json', help='Path to stats file') parser.add_argument('--partner', default=False, action='store_true', help='Whether this is from partner survey') add_scenario_arguments(parser) add_lexicon_arguments(parser) add_visualization_arguments(parser) args = parser.parse_args() raw_eval = [read_json(trans) for trans in args.eval_transcripts] question_scores = defaultdict(lambda: defaultdict(list)) raw_chats = read_json(args.dialogue_transcripts) uuid_to_chat = {chat['uuid']: chat for chat in raw_chats} schema = Schema(args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) dialogue_ids = filter(raw_eval, uuid_to_chat) for eval_ in raw_eval: read_eval(eval_, question_scores, mask=dialogue_ids) if args.hist: hist(question_scores, args.outdir, partner=args.partner) if args.summary: summary = summarize(question_scores)
parser = ArgumentParser() add_scenario_arguments(parser) add_lexicon_arguments(parser) parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts') parser.add_argument('--eval-transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts') parsed_args = parser.parse_args() schema = Schema(parsed_args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(parsed_args.scenarios_path)) transcripts = read_json(parsed_args.transcripts) eval_transcripts = read_json(parsed_args.eval_transcripts) lexicon = Lexicon(schema, False, scenarios_json=parsed_args.scenarios_path, stop_words=parsed_args.stop_words) preprocessor = Preprocessor(schema, lexicon, 'canonical', 'canonical', 'canonical', False) def compute_statistics(chats): speech_act_summary_map = defaultdict(int) total = 0. for agent, raw in chats: ex = Example.from_dict(scenario_db, raw)
if __name__ == "__main__": parser = ArgumentParser() add_scenario_arguments(parser) add_visualization_arguments(parser) parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to json file containing chats') parser.add_argument('--survey_file', type=str, default=None, help='Path to json file containing survey') args = parser.parse_args() schema = Schema(args.schema_path) # scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) transcripts = read_json(args.transcripts) # import pdb; pdb.set_trace() survey = read_json(args.survey_file)[1] html_output = args.html_output if args.viewer_mode: # External js and css write_viewer_data(html_output, transcripts, responses=survey) else: # Inline style visualize_transcripts(html_output, transcripts, responses=survey, css_file=args.css_file)
help='Transciprts paths', nargs='*', default=[]) parser.add_argument('--train-frac', help='Fraction of training examples', type=float, default=0.6) parser.add_argument('--test-frac', help='Fraction of test examples', type=float, default=0.2) parser.add_argument('--dev-frac', help='Fraction of dev examples', type=float, default=0.2) parser.add_argument('--output-path', help='Output path for splits') args = parser.parse_args() np.random.seed(0) json_data = ([], [], []) for path in args.example_paths: examples = read_json(path) folds = np.random.choice( 3, len(examples), p=[args.train_frac, args.dev_frac, args.test_frac]) for ex, fold in izip(examples, folds): json_data[fold].append(ex) for fold, dataset in izip(('train', 'dev', 'test'), json_data): if len(dataset) > 0: write_json(dataset, '%s%s.json' % (args.output_path, fold))
add_graph_arguments(parser) add_graph_embed_arguments(parser) add_learner_arguments(parser) args = parser.parse_args() random.seed(args.random_seed) logstats.init(args.stats_file) logstats.add_args('config', args) # Save or load models if args.init_from: start = time.time() print 'Load model (config, vocab, checkpoint) from', args.init_from config_path = os.path.join(args.init_from, 'config.json') vocab_path = os.path.join(args.init_from, 'vocab.pkl') saved_config = read_json(config_path) saved_config['decoding'] = args.decoding saved_config['batch_size'] = args.batch_size model_args = argparse.Namespace(**saved_config) # Checkpoint if args.test and args.best: ckpt = tf.train.get_checkpoint_state(args.init_from + '-best') else: ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, 'No checkpoint found' assert ckpt.model_checkpoint_path, 'No model path found in checkpoint' # Load vocab mappings = read_pickle(vocab_path) print 'Done [%fs]' % (time.time() - start)
plt.tight_layout() plt.savefig(os.path.join(args.output, '%d-utterance.pdf' % n)) if args.attr: #stats_files = ['%s_stats.json' % x for x in args.stats] stats_files = args.stats ncol = 1 nrow = len(stats_files) #stats = ['max_count', 'max_min_ratio', 'max_count_normalize', 'max_min_ratio_normalize'] stats = ['max_min_ratio_normalize', 'entity_count'] stat_names = ['Skewness of the first mentioned attribute', 'Relative count of the first mentioned entity'] for stat, stat_name in izip(stats, stat_names): plt.cla() fig, axes = plt.subplots(nrows=nrow, ncols=ncol, sharex=True, sharey=True) for i, (ax, stat_file, name) in enumerate(izip(axes, stats_files, args.names)): all_stats = read_json(stat_file) data = all_stats['entity_mention']['first'][stat] background = all_stats['entity_mention']['all'][stat] print name, stat_name, np.mean(data) ax.hist(background, 30, edgecolor='g', normed=True, alpha=0.7, label='BG', fill=False, linewidth=3, histtype='step') ax.hist(data, 30, edgecolor='r', normed=True, alpha=0.7, label='First', fill=False, linewidth=3, histtype='step') if i == 0: ax.legend(ncol=2, bbox_to_anchor=(1,1.5)) ax.set_yscale('log') #ax.locator_params(nbins=4, axis='y') ax.set_title(name, fontsize='x-large') ax.set_xlabel(stat_name, fontsize='x-large') axbox = axes[0].get_position() plt.tight_layout() plt.savefig('%s/first_attr_%s.pdf' % (args.output, stat))
row[question] = np.mean(scores) metadata['data'].append(row) write_json(metadata, os.path.join(outdir, 'metadata.json')) def write_viewer_data(html_output, transcripts, responses=None): if not os.path.exists(html_output): os.makedirs(html_output) write_metadata(transcripts, html_output, responses) write_chat_htmls(transcripts, html_output, responses) if __name__ == "__main__": parser = ArgumentParser() add_scenario_arguments(parser) add_visualization_arguments(parser) parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts') args = parser.parse_args() schema = Schema(args.schema_path) #scenario_db = ScenarioDB.from_dict(schema, read_json(args.scenarios_path)) transcripts = read_json(args.transcripts) html_output = args.html_output if args.viewer_mode: # External js and css write_viewer_data(html_output, transcripts) else: # Inline style visualize_transcripts(html_output, transcripts, css_file=args.css_file)
plot_alpha_stats(strategy_stats["alpha_stats"], args.plot_alpha_stats) if args.plot_item_stats: plot_num_item_stats(strategy_stats["num_items_stats"], args.plot_item_stats) json.dump(stats, statsfile) statsfile.close() if __name__ == "__main__": parser = ArgumentParser() add_scenario_arguments(parser) add_lexicon_arguments(parser) parser.add_argument('--transcripts', type=str, default='transcripts.json', help='Path to directory containing transcripts') add_statistics_arguments(parser) parsed_args = parser.parse_args() schema = Schema(parsed_args.schema_path) scenario_db = ScenarioDB.from_dict(schema, read_json(parsed_args.scenarios_path)) transcripts = json.load(open(parsed_args.transcripts, 'r')) # transcripts = transcripts[:100] lexicon = Lexicon(schema, False, scenarios_json=parsed_args.scenarios_path, stop_words=parsed_args.stop_words) compute_statistics(parsed_args, lexicon, schema, scenario_db, transcripts)