def count_unks(): options = config.options() print('Data source: {}'.format(options.data_source)) print('Unk threshold: {}'.format(options.unk_threshold)) print('Tokenizer: {}'.format(options.tokenizer)) print('') print('Loading data') train_insts = color_instances.SOURCES[options.data_source].train_data( listener=True) eval_insts = color_instances.SOURCES[options.data_source].test_data( listener=True) tokenize = TOKENIZERS[options.tokenizer] vec = SequenceVectorizer(unk_threshold=options.unk_threshold) print('Tokenizing training data') train_tokenized = [['<s>'] + tokenize(inst.input) + ['</s>'] for inst in train_insts] print('Tokenizing eval data') eval_tokenized = [['<s>'] + tokenize(inst.input) + ['</s>'] for inst in eval_insts] print('Initializing vectorizer') vec.add_all(train_tokenized) print_unk_ratio(train_tokenized, vec, 'Train') print_unk_ratio(eval_tokenized, vec, 'Eval')
def evaluate_l1_eval(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [instance.Instance(**json.loads(line.strip())) for line in infile] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) l1 = compute_l1(s0, alpha=options.alpha) l1_scores = l1[np.arange(l1.shape[0]), gold_outputs].tolist() l1_preds = np.argmax(l1, axis=1).tolist() m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy] learner = DummyLearner(l1_preds, l1_scores) results = evaluate.evaluate(learner, insts, metrics=m, split_id='l1_eval', write_data=False) output.output_results(results, 'l1_eval')
def __init__(self): options = config.options() self.game_config = cards_config.new(options.game_config) self.viewer = None self.verbosity = 4 # One action for each player player = spaces.Discrete(len(ACTIONS)) # should this be spaces.Tuple((player, player)) for 2 players? self.action_space = spaces.Tuple([player for _ in range(MAX_BATCH_SIZE)]) # One board for walls, one for card observations, one for player location board = spaces.Box(np.zeros(MAX_BOARD_SIZE), np.ones(MAX_BOARD_SIZE)) language_player = spaces.Box(np.array(0.), np.array(1.)) language = spaces.Tuple([language_player for _ in range(self.game_config.num_players - 1)]) hand = spaces.Box(np.zeros((3, len(RANKS), len(SUITS))), np.ones((3, len(RANKS), len(SUITS)))) floor = spaces.Box(np.zeros((len(RANKS), len(SUITS))), np.ones((len(RANKS), len(SUITS)))) all_obs = (board, board, board, hand, floor, language) self.observation_space = spaces.Tuple([e for _ in range(MAX_BATCH_SIZE) for e in all_obs]) self.clear_boards() import world self.default_world = world.CardsWorld(all_transcripts()[0]) self._seed()
def main(): options = config.options(read=True) app = wx.App() # NOQA: wx needs an App even if we're only showing a few modal dialogs this_output = html_report.get_output(options.run_dir, options.split) this_insts = get_trial_data(this_output, options.test_size, options.run_dir) if options.compare_dir: compare_output = html_report.get_output(options.compare_dir, options.split) compare_insts = get_trial_data(compare_output, options.test_size, options.run_dir) else: compare_insts = [] all_insts = this_insts + compare_insts random.shuffle(all_insts) human = HumanListener() human.train(all_insts) m = [metrics.squared_error] test_results = evaluate.evaluate(human, this_insts, split_id='human_eval', metrics=m) output.output_results(test_results, options.run_dir) if compare_insts: test_results = evaluate.evaluate(human, compare_insts, split_id='human_eval_compare', metrics=m) output.output_results(test_results, options.compare_dir)
def write_metrics(): options = config.options(read=True) for split in options.splits: output = html_report.get_output(options.run_dir, split) for m in options.metrics: write_metric_for_split(output, options.run_dir, split, m)
def run_speaker(self, speaker_class, cell='LSTM', color_repr='buckets', tensorboard=True, images=False): sys.argv = [] options = config.options() options.train_iters = 2 options.train_epochs = 3 options.speaker_cell = cell options.speaker_color_repr = color_repr options.listener = False mo = MockOpen(TEST_DIR) mgfp = mock_get_file_path(TEST_DIR) with mock.patch('stanza.monitoring.summary.open', mo), \ mock.patch('stanza.monitoring.summary.SummaryWriter', MockSummaryWriter), \ mock.patch('stanza.research.config.open', mo), \ mock.patch('stanza.research.config.get_file_path', mgfp): speaker = speaker_class() train_data = [instance.Instance((0, 255, 0), 'green')] speaker.train(train_data) predictions, scores = speaker.predict_and_score(train_data) # predictions = ['somestring'] self.assertIsInstance(predictions, list) self.assertEqual(len(predictions), 1) self.assertIsInstance(predictions[0], basestring) # scores = [123.456] self.assertIsInstance(scores, list) self.assertEqual(len(scores), 1) self.assertIsInstance(scores[0], float) if tensorboard: self.check_tensorboard(mo, mgfp, images=images)
def tuna_test_cv(listener=False): options = config.options() files_glob = 'tuna/corpus/%s/*.xml' % (options.tuna_section, ) trials, splits = get_tuna_insts(files_glob, options.tuna_cv_folds) train_indices, test_indices = splits[options.tuna_cv_test_fold] return trials_to_insts([trials[i] for i in test_indices], listener=listener)
def run_listener(self, listener_class=ListenerLearner, cell='LSTM', tensorboard=True): sys.argv = [] options = config.options() options.train_iters = 2 options.train_epochs = 3 options.listener_cell = cell options.listener = True mo = MockOpen(TEST_DIR) mgfp = mock_get_file_path(TEST_DIR) with mock.patch('stanza.monitoring.summary.open', mo), \ mock.patch('stanza.monitoring.summary.SummaryWriter', MockSummaryWriter), \ mock.patch('stanza.research.config.open', mo), \ mock.patch('stanza.research.config.get_file_path', mgfp): listener = listener_class() train_data = [instance.Instance('green', (0, 255, 0))] listener.train(train_data) predictions, scores = listener.predict_and_score(train_data) # predictions = [(123, 45, 67)] self.assertIsInstance(predictions, list) self.assertEqual(len(predictions), 1) self.assertEqual(len(predictions[0]), 3) self.assertIsInstance(predictions[0][0], Number) # scores = [123.456] self.assertIsInstance(scores, list) self.assertEqual(len(scores), 1) self.assertIsInstance(scores[0], float) if tensorboard: self.check_tensorboard(mo, mgfp, images=True)
def evaluate_l1_eval(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [ instance.Instance(**json.loads(line.strip())) for line in infile ] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) l1 = compute_l1(s0, alpha=options.alpha) l1_scores = l1[np.arange(l1.shape[0]), gold_outputs].tolist() l1_preds = np.argmax(l1, axis=1).tolist() m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy ] learner = DummyLearner(l1_preds, l1_scores) results = evaluate.evaluate(learner, insts, metrics=m, split_id='l1_eval', write_data=False) output.output_results(results, 'l1_eval')
def __init__(self): options = config.options() self.tokenizer = options.speaker_tokenizer self.token_counts = Counter() self.seq_vec = SequenceVectorizer( unk_threshold=options.speaker_unk_threshold) self.num_tokens = 0
def output_replaced_data(run_dir=None): options = config.options(read=True) run_dir = run_dir or options.run_dir for output, preds, out_filename in get_all_outputs(run_dir, options.speaker_dir, options.model_name): config.dump(replaced_data(output, preds), out_filename, lines=True)
def tuna_all(listener=False, corpus='tuna/corpus'): options = config.options() files_glob = '%s/%s/*.xml' % ( corpus, options.tuna_section, ) trials, _ = get_tuna_insts(files_glob, options.tuna_cv_folds) return trials_to_insts(trials, listener=listener)
def generate_html_reports(run_dir=None, compare_dir=None): options = config.options(read=True) run_dir = run_dir or options.run_dir compare_dir = compare_dir or options.compare_dir for output, compare, out_path in get_all_outputs(run_dir, options.compare_dir): with open(out_path, 'w') as outfile: outfile.write(html_report(output, compare))
def evaluate_ak_blending(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [instance.Instance(**json.loads(line.strip())) for line in infile] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) l0 = np.array([[np.array(ss['L0']).T for ss in grid['sets']] for grid in grids]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) if options.additive: ak = compute_additive(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha_s1=options.alpha, alpha_l1=options.alpha_l1) else: ak = compute_ak(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha=options.alpha, gamma=options.gamma) ak_scores = ak[np.arange(ak.shape[0]), gold_outputs].tolist() ak_preds = np.argmax(ak, axis=1).tolist() m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy] learner = DummyLearner(ak_preds, ak_scores, params={ 'base_weight': options.base_weight, 'speaker_weight': options.speaker_weight, 'alpha': options.alpha, 'alpha_l1': options.alpha_l1, 'gamma': options.gamma, 'additive': options.additive, }) split_id = '{}_eval'.format(options.blend_name) results = evaluate.evaluate(learner, insts, metrics=m, split_id=split_id, write_data=False) output.output_results(results, split_id) options_dump = vars(options) del options_dump['overwrite'] del options_dump['config'] config.dump_pretty(options_dump, split_id + '_config.json')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener)[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener)[:options.test_size] learner = learners.new(options.learner) m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic ] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def train(self, training_instances, validation_instances='ignored', metrics='ignored'): options = config.options() for inst in training_instances: inp, out = inst.input, inst.output if options.listener: out = self.vectorize(out) else: inp = self.vectorize(inp) self.counters[inp][out] += 1
def output_sample(model): options = config.options() insts = model.sample_joint_smooth(num_samples=options.num_samples) if not options.listener: insts = [inst.inverted() for inst in insts] html = rsa_fit_data.get_html(insts, title='Agent samples (smoothed prior)') config.dump([inst.__dict__ for inst in insts], 'data.sample.jsons', lines=True) with config.open('report.sample.html', 'w') as outfile: outfile.write(html)
def tune_queue(main_fn): config.redirect_output() options = config.options() if any('tune' not in s for s in options.data_source): warnings.warn( 'expected all --data_source\'s to contain "tune", instead got "{}". ' 'Are you polluting your dev/test set?'.format(options.data_source)) if 'gpu' in options.device or 'cuda' in options.device: warnings.warn( 'device is "{}". Have you checked that all processes will fit ' 'on one GPU? (Random GPU assignment has not been implemented ' 'yet.)'.format(options.device)) with open(options.tune_config, 'r') as infile: tune_options = config.HoconConfigFileParser().parse(infile) reg = ProcessRegistry(main_fn, tune_options, options.tune_maximize) remaining_random = options.tune_random remaining_local = options.tune_local if options.tune_local <= 0: remaining_local = None try: reg.start_default() while remaining_random > 0 and reg.running_processes < options.tune_max_processes: reg.start_random() remaining_random -= 1 while remaining_local > 0 and reg.running_processes < options.tune_max_processes: reg.start_local() remaining_random -= 1 while reg.running_processes > 0: name, objective = reg.get() print('\nTUNE: {:10.3f} {}\n'.format(objective, name[:70])) while remaining_random > 0 and reg.running_processes < options.tune_max_processes: reg.start_random() remaining_random -= 1 while (remaining_local is None or remaining_local > 0) and \ reg.running_processes < options.tune_max_processes: try: reg.start_local() if remaining_local is not None: remaining_local -= 1 except StopIteration: print('no new local search candidates') break except KeyboardInterrupt: reg.terminate() print('') print('best result:') print('{:10.3f} {}'.format(reg.best_objective, str(reg.best_name)[:70]))
def __init__(self): import learners import cards_env options = config.options() if options.verbosity >= 4: print('Loading speaker') self.speaker = learners.new(options.p2_learner) self.speaker.load(options.p2_load) self.utterances = [None for _ in range(cards_env.MAX_BATCH_SIZE)] self.ace_locs = [None for _ in range(cards_env.MAX_BATCH_SIZE)]
def tune_queue(main_fn): config.redirect_output() options = config.options() if any('tune' not in s for s in options.data_source): warnings.warn('expected all --data_source\'s to contain "tune", instead got "{}". ' 'Are you polluting your dev/test set?'.format(options.data_source)) if 'gpu' in options.device or 'cuda' in options.device: warnings.warn('device is "{}". Have you checked that all processes will fit ' 'on one GPU? (Random GPU assignment has not been implemented ' 'yet.)'.format(options.device)) with open(options.tune_config, 'r') as infile: tune_options = config.HoconConfigFileParser().parse(infile) reg = ProcessRegistry(main_fn, tune_options, options.tune_maximize) remaining_random = options.tune_random remaining_local = options.tune_local if options.tune_local <= 0: remaining_local = None try: reg.start_default() while remaining_random > 0 and reg.running_processes < options.tune_max_processes: reg.start_random() remaining_random -= 1 while remaining_local > 0 and reg.running_processes < options.tune_max_processes: reg.start_local() remaining_random -= 1 while reg.running_processes > 0: name, objective = reg.get() print('\nTUNE: {:10.3f} {}\n'.format(objective, name[:70])) while remaining_random > 0 and reg.running_processes < options.tune_max_processes: reg.start_random() remaining_random -= 1 while (remaining_local is None or remaining_local > 0) and \ reg.running_processes < options.tune_max_processes: try: reg.start_local() if remaining_local is not None: remaining_local -= 1 except StopIteration: print('no new local search candidates') break except KeyboardInterrupt: reg.terminate() print('') print('best result:') print('{:10.3f} {}'.format(reg.best_objective, str(reg.best_name)[:70]))
def test_main(): options = config.options() import sys print('stdout') sys.stderr.write('stderr\n') return {}, { 'eval.perplexity.gmean': (options.speaker_learning_rate + options.speaker_cell_size + len(options.speaker_optimizer)) }
def generate_html_reports(run_dir=None, compare_dir=None): options = config.options(read=True) run_dir = run_dir or options.run_dir compare_dir = compare_dir or options.compare_dir for output, compare, out_path in get_all_outputs(run_dir, options.compare_dir): with open(out_path, 'w') as outfile: outfile.write(html_report(output, compare, per_token=options.per_token_prob, only_differing=options.only_differing_preds, show_all=options.show_all, show_tokens=options.show_tokens))
def __init__(self): options = config.options() self.counters = defaultdict(Counter) if options.listener: res = options.listener_color_resolution hsv = options.listener_hsv else: res = options.speaker_color_resolution hsv = options.speaker_hsv self.res = res self.hsv = hsv self.init_vectorizer()
def reference_game(insts, gen_func, listener=False): options = config.options() for i in range(len(insts)): color = insts[i].output if listener else insts[i].input distractors = [gen_func(color) for _ in range(options.num_distractors)] answer = rng.randint(0, len(distractors) + 1) context = distractors[:answer] + [color] + distractors[answer:] ref_inst = (Instance(insts[i].input, answer, alt_outputs=context) if listener else Instance( answer, insts[i].output, alt_inputs=context)) insts[i] = ref_inst return insts
def reference_game(insts, gen_func, listener=False): options = config.options() for i in range(len(insts)): color = insts[i].output if listener else insts[i].input distractors = [gen_func(color) for _ in range(options.num_distractors)] answer = rng.randint(0, len(distractors) + 1) context = distractors[:answer] + [color] + distractors[answer:] ref_inst = (Instance(insts[i].input, answer, alt_outputs=context) if listener else Instance(answer, insts[i].output, alt_inputs=context)) insts[i] = ref_inst return insts
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener )[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener )[:options.test_size] learner = learners.new(options.learner) m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def reference_game(insts, gen_func, listener=False): options = config.options() result = [] for inst in insts: color = inst.output if listener else inst.input distractors = [gen_func(color) for _ in range(options.num_distractors)] answer = rng.randint(0, len(distractors) + 1) context = distractors[:answer] + [color] + distractors[answer:] ref_inst = (Instance(inst.input, answer, alt_outputs=context) if listener else Instance(answer, inst.output, alt_inputs=context)) result.append(ref_inst) return result
def output_sample(model): options = config.options() assert len(options.data_source) == 1, \ 'Only one data source at a time for sampling (got %s)' % options.data_source source = options.data_source[0] train_insts = color_instances.SOURCES[source].train_data(listener=options.listener) test_insts = color_instances.SOURCES[source].test_data( options.listener )[:options.test_size[0]] for output in model.predict(test_insts, random=True): print(json.dumps(output))
def bilingual_unbalanced_train(listener=False, suffix='Chinese_filtered'): options = config.options() num_en_insts = none_if_negative(options.num_en_insts) num_zh_insts = none_if_negative(options.num_zh_insts) result = [] en_insts = filtered_train(listener=listener)[:num_en_insts] zh_insts = chinese_train(listener=listener, suffix=suffix)[:num_zh_insts] for inst in en_insts: result.append(bilingual_tag_instance(inst, 'en', listener=listener, unicodify=True)) for inst in zh_insts: result.append(bilingual_tag_instance(inst, 'zh', listener=listener)) rng.shuffle(result) return result
def convert_gpu(): options = config.options() with open(options.load, 'rb') as infile: with thutils.device_context(options.from_device): learner = pickle.load(infile) model = learner.model with thutils.device_context(options.to_device): model.module = thutils.maybe_cuda(model.module) model.loss = thutils.maybe_cuda(model.loss) model.build_optimizer() learner.options.device = options.to_device with open(options.save, 'wb') as outfile: learner.dump(outfile)
def bilingual_train(listener=False, suffix='Chinese'): options = config.options() num_en_insts = none_if_negative(options.num_en_insts) num_zh_insts = none_if_negative(options.num_zh_insts) result = [] en_insts = filtered_train(listener=listener)[:num_en_insts] zh_insts = chinese_train(listener=listener, suffix=suffix)[:num_zh_insts] if len(en_insts) >= len(zh_insts): zh_insts = cycle_shuffled(zh_insts) else: en_insts = cycle_shuffled(en_insts) for e, z in zip(en_insts, zh_insts): result.append(bilingual_tag_instance(e, 'en', listener=listener, unicodify=True)) result.append(bilingual_tag_instance(z, 'zh', listener=listener)) return result
def get_dataset(self, model): if hasattr(model, 'options'): options = model.options else: options = config.options() data_sources = options.data_source if not isinstance(data_sources, list): data_sources = [data_sources] train_sizes = options.train_size if not isinstance(train_sizes, list): train_sizes = [train_sizes] return [ inst for data_source, train_size in zip(data_sources, train_sizes) for inst in color_instances.SOURCES[data_source].train_data( listener=True)[:train_size] ]
def bilingual_train(listener=False, suffix='Chinese'): options = config.options() num_en_insts = none_if_negative(options.num_en_insts) num_zh_insts = none_if_negative(options.num_zh_insts) result = [] en_insts = filtered_train(listener=listener)[:num_en_insts] zh_insts = chinese_train(listener=listener, suffix=suffix)[:num_zh_insts] if len(en_insts) >= len(zh_insts): zh_insts = cycle_shuffled(zh_insts) else: en_insts = cycle_shuffled(en_insts) for e, z in zip(en_insts, zh_insts): result.append( bilingual_tag_instance(e, 'en', listener=listener, unicodify=True)) result.append(bilingual_tag_instance(z, 'zh', listener=listener)) return result
def get_dataset(self, model): if hasattr(model, 'options'): options = model.options else: options = config.options() data_sources = options.data_source if not isinstance(data_sources, list): data_sources = [data_sources] train_sizes = options.train_size if not isinstance(train_sizes, list): train_sizes = [train_sizes] return [ inst for data_source, train_size in zip(data_sources, train_sizes) for inst in color_instances.SOURCES[data_source].train_data(listener=True)[:train_size] ]
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_size = options.train_size if options.train_size >= 0 else None test_size = options.test_size if options.test_size >= 0 else None train_data = datasets.SOURCES[options.data_source].train_data()[:train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[options.data_source].test_data()[:test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: learner.load(options.load) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model') if model_path: learner.dump(model_path) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') if options.output_train_samples: samples = learner.predict(train_data, random=True) config.dump(samples, 'samples.train.jsons', lines=True) test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval') if options.output_test_samples: samples = learner.predict(test_data, random=True) config.dump(samples, 'samples.eval.jsons', lines=True)
def bilingual_unbalanced_train(listener=False, suffix='Chinese_filtered'): options = config.options() num_en_insts = none_if_negative(options.num_en_insts) num_zh_insts = none_if_negative(options.num_zh_insts) result = [] en_insts = filtered_train(listener=listener)[:num_en_insts] zh_insts = chinese_train(listener=listener, suffix=suffix)[:num_zh_insts] for inst in en_insts: result.append( bilingual_tag_instance(inst, 'en', listener=listener, unicodify=True)) for inst in zh_insts: result.append(bilingual_tag_instance(inst, 'zh', listener=listener)) rng.shuffle(result) return result
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = datasets.SOURCES[ options.data_source].train_data()[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[ options.data_source].test_data()[:options.test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval')
def predict_and_score(self, eval_instances, random='ignored', verbosity=0): options = config.options() if options.verbosity + verbosity >= 2: print('Testing') predictions = [] scores = [] for inst in eval_instances: inp, out = inst.input, inst.output if options.listener: out = self.vectorize(out) else: inp = self.vectorize(inp) counter = self.counters[inp] highest = counter.most_common(1) if highest: if options.listener: prediction = self.unvectorize(highest[0][0]) else: prediction = highest[0][0] elif options.listener: prediction = (0, 0, 0) else: prediction = '<unk>' total = sum(counter.values()) if total: if options.verbosity + verbosity >= 9: print('%s -> %s: %s of %s [%s]' % (repr(inp), repr(out), counter[out], total, inst.input)) prob = counter[out] * 1.0 / total else: if options.verbosity + verbosity >= 9: print('%s -> %s: no data [%s]' % (repr(inp), repr(out), inst.input)) prob = 1.0 * (inst.output == prediction) score = np.log(prob) if options.listener: score += self.score_adjustment predictions.append(prediction) scores.append(score) return predictions, scores
def output_csv(): options = config.options(read=True) output = html_report.get_output(options.run_dir, options.split) insts = get_trial_data(output, options.test_size, options.run_dir) print(','.join('ex%d%s' % (ex, part) for ex in range(BATCH_SIZE) for part in ['cid', 'system', 'desc', 'target', 'c1', 'c2', 'c3'])) for i, batch in enumerate(iterators.iter_batches(insts, BATCH_SIZE)): batch = list(batch) if len(batch) != BATCH_SIZE: continue print(','.join('"%d:%d","%s","%s","%s","%s","%s","%s"' % ((i, j, inst.source, inst.input, inst.output) + tuple(html_report.web_color(c) for c in inst.alt_outputs[:3])) for j, inst in enumerate(batch)))
def generate_html(run_dir=None): options = config.options(read=True) run_dir = run_dir or options.run_dir out_path = os.path.join(run_dir, 'grids.html') try: in_path = os.path.join(run_dir, 's0_grids.0.jsons.gz') with open(in_path, 'r'): pass except IOError: in_path = os.path.join(run_dir, 'grids.0.jsons.gz') output = get_output(run_dir, 'eval') if 'error' in output.data[0]: output = get_output(run_dir, 'hawkins_dev') if 'error' in output.data[0]: output = get_output(run_dir, 'dev') with open(out_path, 'w') as outfile, gzip.open(in_path, 'r') as infile: write_files(infile, outfile, output, options)
def predict_and_score(self, eval_instances, random='ignored', verbosity='ignored'): options = config.options() predictions = [] scores = [] pool = multiprocessing.Pool(options.lux_threads) batch_size = options.lux_batch_size progress.start_task('Example', len(eval_instances)) for start in range(0, len(eval_instances), batch_size): progress.progress(start) batch_output = pool.map(lux_predict_and_score, eval_instances[start:start + batch_size]) batch_preds, batch_scores = zip(*batch_output) predictions.extend(batch_preds) scores.extend(batch_scores) progress.end_task() return predictions, scores
def output_csv(): options = config.options(read=True) output = html_report.get_output(options.run_dir, options.split) insts = get_trial_data(output, options.test_size, options.run_dir) print(','.join( 'ex%d%s' % (ex, part) for ex in range(BATCH_SIZE) for part in ['cid', 'system', 'desc', 'target', 'c1', 'c2', 'c3'])) for i, batch in enumerate(iterators.iter_batches(insts, BATCH_SIZE)): batch = list(batch) if len(batch) != BATCH_SIZE: continue print(','.join( '"%d:%d","%s","%s","%s","%s","%s","%s"' % ((i, j, inst.source, inst.input, inst.output) + tuple(html_report.web_color(c) for c in inst.alt_outputs[:3])) for j, inst in enumerate(batch)))
def print_error_analysis(): options = config.options(read=True) output = get_output(options.run_dir, 'eval') errors = [(inst['input'], pred, inst['output']) for inst, pred in zip(output.data, output.predictions) if inst['output'] != pred] if 0 < options.max_examples < len(errors): indices = np.random.choice(np.arange(len(errors)), size=options.max_examples, replace=False) else: indices = range(len(errors)) if options.html: print('<!DOCTYPE html>') print('<html><head><title>Error analysis</title><meta charset="utf-8" /></head><body>') for i in indices: inp, pred, gold = [unicode(s).strip() for s in errors[i]] editops = lev.editops(gold, pred) print_visualization(inp, pred, gold, editops, html=options.html) if options.html: print('</body></html>')
def start(self, tuned_options, mode='manual'): name = self.short_name(tuned_options) options_dict = dict(config.options().__dict__) options_dict['run_dir'] = os.path.join(options_dict['run_dir'], name) options_dict['overwrite'] = False options_dict['config'] = None for k, v in tuned_options: options_dict[k] = v options = argparse.Namespace(**options_dict) if options_dict['tune_delay'] > 0: time.sleep(options_dict['tune_delay']) proc = mp.Process(target=queue_results, args=(self.main_fn, options, name, self.results_queue)) self.proc_for_name[name] = proc self.name_for_options[tuned_options] = name self.options_for_name[name] = tuned_options self.running_processes += 1 print('starting {}: {}'.format(mode, name)) proc.start()
def generate_csv(run_dir=None): options = config.options(read=True) run_dir = run_dir or options.run_dir in_path = 'behavioralAnalysis/humanOutput/filteredCorpus.csv' if options.filtered else None if options.listener: out_path = os.path.join(run_dir, 'clickedObj.csv') if not in_path: in_path = 'hawkins_data/colorReferenceClicks%s.csv' % options.suffix else: out_path = os.path.join(run_dir, 'message.csv') if not in_path: in_path = 'hawkins_data/colorReferenceMessage%s.csv' % options.suffix output = get_output(run_dir, 'eval') if 'error' in output.data[0]: output = get_output(run_dir, 'hawkins_dev') if 'error' in output.data[0]: output = get_output(run_dir, 'dev') with open(out_path, 'w') as outfile, open(in_path, 'r') as infile: outfile.write(csv_output(output, infile, listener=options.listener, source=options.source, filtered=options.filtered))
def output_html_dists(): options = config.options(read=True) with gzip.open(config.get_file_path('dists.b64.gz'), 'r') as infile: rows = list(infile) with config.open('dists.js', 'w') as outfile: write_json_dists(rows, outfile) write_json_ents(rows, outfile) with config.open('data.eval.jsons', 'r') as infile: insts = list(infile) with config.open('predictions.eval.jsons', 'r') as infile: preds = list(infile) try: with config.open('samples.eval.jsons', 'r') as infile: samples = list(infile) except IOError: samples = None with config.open('insts.js', 'w') as outfile: write_json_insts(insts, preds, samples, outfile, listener=options.listener) shutil.copy('dists.html', config.get_file_path('dists.html'))
def print_confusion_matrix(): options = config.options(read=True) data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: gold = [json.loads(line.strip())['output'] for line in infile] preds_path = os.path.join(options.run_dir, 'predictions.eval.jsons') with open(preds_path, 'r') as infile: preds = [json.loads(line.strip()) for line in infile] print( classification_report(gold, preds, target_names=['none', 'speak', 'choose'])) print(confusion_matrix(gold, preds)) print('(row = gold; column = prediction)\n') gold = [int(y == 2) for y in gold] preds = [int(y == 2) for y in preds] print(classification_report(gold, preds, target_names=['other', 'choose'])) print(confusion_matrix(gold, preds)) print('(row = gold; column = prediction)\n')