def evaluate_l1_eval(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [ instance.Instance(**json.loads(line.strip())) for line in infile ] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) l1 = compute_l1(s0, alpha=options.alpha) l1_scores = l1[np.arange(l1.shape[0]), gold_outputs].tolist() l1_preds = np.argmax(l1, axis=1).tolist() m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy ] learner = DummyLearner(l1_preds, l1_scores) results = evaluate.evaluate(learner, insts, metrics=m, split_id='l1_eval', write_data=False) output.output_results(results, 'l1_eval')
def evaluate_l1_eval(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [instance.Instance(**json.loads(line.strip())) for line in infile] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) l1 = compute_l1(s0, alpha=options.alpha) l1_scores = l1[np.arange(l1.shape[0]), gold_outputs].tolist() l1_preds = np.argmax(l1, axis=1).tolist() m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy] learner = DummyLearner(l1_preds, l1_scores) results = evaluate.evaluate(learner, insts, metrics=m, split_id='l1_eval', write_data=False) output.output_results(results, 'l1_eval')
def main(): options = config.options(read=True) app = wx.App() # NOQA: wx needs an App even if we're only showing a few modal dialogs this_output = html_report.get_output(options.run_dir, options.split) this_insts = get_trial_data(this_output, options.test_size, options.run_dir) if options.compare_dir: compare_output = html_report.get_output(options.compare_dir, options.split) compare_insts = get_trial_data(compare_output, options.test_size, options.run_dir) else: compare_insts = [] all_insts = this_insts + compare_insts random.shuffle(all_insts) human = HumanListener() human.train(all_insts) m = [metrics.squared_error] test_results = evaluate.evaluate(human, this_insts, split_id='human_eval', metrics=m) output.output_results(test_results, options.run_dir) if compare_insts: test_results = evaluate.evaluate(human, compare_insts, split_id='human_eval_compare', metrics=m) output.output_results(test_results, options.compare_dir)
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener)[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener)[:options.test_size] learner = learners.new(options.learner) m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic ] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def evaluate_ak_blending(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [instance.Instance(**json.loads(line.strip())) for line in infile] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) l0 = np.array([[np.array(ss['L0']).T for ss in grid['sets']] for grid in grids]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) if options.additive: ak = compute_additive(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha_s1=options.alpha, alpha_l1=options.alpha_l1) else: ak = compute_ak(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha=options.alpha, gamma=options.gamma) ak_scores = ak[np.arange(ak.shape[0]), gold_outputs].tolist() ak_preds = np.argmax(ak, axis=1).tolist() m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy] learner = DummyLearner(ak_preds, ak_scores, params={ 'base_weight': options.base_weight, 'speaker_weight': options.speaker_weight, 'alpha': options.alpha, 'alpha_l1': options.alpha_l1, 'gamma': options.gamma, 'additive': options.additive, }) split_id = '{}_eval'.format(options.blend_name) results = evaluate.evaluate(learner, insts, metrics=m, split_id=split_id, write_data=False) output.output_results(results, split_id) options_dump = vars(options) del options_dump['overwrite'] del options_dump['config'] config.dump_pretty(options_dump, split_id + '_config.json')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = color_instances.SOURCES[options.data_source].train_data( listener=options.listener )[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = color_instances.SOURCES[options.data_source].test_data( options.listener )[:options.test_size] learner = learners.new(options.learner) m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic] if options.listener and not isinstance(test_data[0].output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(test_data[0].output, (tuple, list)): m.append(metrics.prec1) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) else: m.append(metrics.accuracy) if test_data[0].output and isinstance(test_data[0].output, basestring): m.append(metrics.bleu) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='dev', write_data=options.output_test_data) output.output_results(test_results, 'dev')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_size = options.train_size if options.train_size >= 0 else None test_size = options.test_size if options.test_size >= 0 else None train_data = datasets.SOURCES[options.data_source].train_data()[:train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[options.data_source].test_data()[:test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: learner.load(options.load) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model') if model_path: learner.dump(model_path) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') if options.output_train_samples: samples = learner.predict(train_data, random=True) config.dump(samples, 'samples.train.jsons', lines=True) test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval') if options.output_test_samples: samples = learner.predict(test_data, random=True) config.dump(samples, 'samples.eval.jsons', lines=True)
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = datasets.SOURCES[ options.data_source].train_data()[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[ options.data_source].test_data()[:options.test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_data = datasets.SOURCES[options.data_source].train_data()[:options.train_size] if options.validation_size: assert options.validation_size < len(train_data), \ ('No training data after validation split! (%d <= %d)' % (len(train_data), options.validation_size)) validation_data = train_data[-options.validation_size:] train_data = train_data[:-options.validation_size] else: validation_data = None test_data = datasets.SOURCES[options.data_source].test_data()[:options.test_size] learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate(learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data) output.output_results(train_results, 'train') test_results = evaluate.evaluate(learner, test_data, metrics=m, split_id='eval', write_data=options.output_test_data) output.output_results(test_results, 'eval')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_datasets = [] validation_datasets = [] test_datasets = [] if len(options.train_size) == 1: options.train_size = options.train_size * len(options.data_source) else: assert len(options.train_size) == len(options.data_source) if len(options.validation_size) == 1: options.validation_size = options.validation_size * len( options.data_source) else: assert len(options.validation_size) == len(options.data_source) if len(options.test_size) == 1: options.test_size = options.test_size * len(options.data_source) else: assert len(options.test_size) == len(options.data_source) for source, train_size, validation_size, test_size in zip( options.data_source, options.train_size, options.validation_size, options.test_size): train_insts = color_instances.SOURCES[source].train_data( listener=options.listener)[:train_size] if validation_size: assert validation_size < len(train_insts), \ ('No training data after validation split! (%d <= %d)' % (len(train_insts), validation_size)) validation_insts = train_insts[-validation_size:] validation_datasets.append(validation_insts) train_insts = train_insts[:-validation_size] else: validation_datasets.append(None) train_datasets.append(train_insts) test_insts = color_instances.SOURCES[source].test_data( options.listener)[:test_size] test_datasets.append(test_insts) learner = learners.new(options.learner) m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic ] example_inst = get_example_inst(test_datasets, train_datasets) if options.listener and not isinstance(example_inst.output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(example_inst.output, (tuple, list)): m.append(metrics.prec1) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([ metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro ]) else: m.append(metrics.accuracy) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([ metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro ]) multi_train = (len(options.data_source) > 1) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) train_results = None else: if hasattr(learner, '_data_to_arrays'): # XXX: is there a better way to ensure that the vocabulary is defined # before training starts? for train_insts in train_datasets[1:]: learner._data_to_arrays(train_insts, init_vectorizer=True) for i, (source, train_insts, validation_insts) in enumerate( zip(options.data_source, train_datasets, validation_datasets)): if not train_insts: continue if i > 0: learner.train(train_insts, validation_insts, metrics=m, keep_params=True) else: learner.train(train_insts, validation_insts, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) if multi_train: split_id = 'train_' + source else: split_id = 'train' train_results = evaluate.evaluate( learner, train_insts, metrics=m, split_id=split_id, write_data=options.output_train_data) if options.verbosity != 0: output.output_results(train_results, split_id) for i, (source, test_insts) in enumerate(zip(options.data_source, test_datasets)): if not test_insts: continue if multi_train: split_id = 'eval_' + source else: split_id = 'eval' test_results = evaluate.evaluate(learner, test_insts, metrics=m, split_id=split_id, write_data=options.output_test_data) if options.verbosity != 0: output.output_results(test_results, split_id) return train_results, test_results
def evaluate_ak_blending(): options = config.options(read=True) grids_path = os.path.join(options.run_dir, 's0_grids.0.jsons.gz') with gzip.open(grids_path, 'rb') as infile: grids = [json.loads(line.strip()) for line in infile] data_path = os.path.join(options.run_dir, 'data.eval.jsons') with open(data_path, 'r') as infile: insts = [ instance.Instance(**json.loads(line.strip())) for line in infile ] assert len(grids) == len(insts), '{} != {}'.format(len(grids), len(insts)) gold_outputs = np.array([inst.output for inst in insts]) l0 = np.array([[np.array(ss['L0']).T for ss in grid['sets']] for grid in grids]) s0 = np.array([[np.array(ss['S0']).T for ss in grid['sets']] for grid in grids]) if options.additive: ak = compute_additive(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha_s1=options.alpha, alpha_l1=options.alpha_l1) else: ak = compute_ak(l0, s0, bw=options.base_weight, sw=options.speaker_weight, alpha=options.alpha, gamma=options.gamma) ak_scores = ak[np.arange(ak.shape[0]), gold_outputs].tolist() ak_preds = np.argmax(ak, axis=1).tolist() m = [ metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.accuracy ] learner = DummyLearner(ak_preds, ak_scores, params={ 'base_weight': options.base_weight, 'speaker_weight': options.speaker_weight, 'alpha': options.alpha, 'alpha_l1': options.alpha_l1, 'gamma': options.gamma, 'additive': options.additive, }) split_id = '{}_eval'.format(options.blend_name) results = evaluate.evaluate(learner, insts, metrics=m, split_id=split_id, write_data=False) output.output_results(results, split_id) options_dump = vars(options) del options_dump['overwrite'] del options_dump['config'] config.dump_pretty(options_dump, split_id + '_config.json')
def main(): options = config.options() with thutils.device_context(options.device): progress.set_resolution( datetime.timedelta(seconds=options.progress_tick)) SG = iterators.SizedGenerator if not hasattr(options, 'verbosity') or options.verbosity >= 2: print('Pre-calculating dataset sizes') train_data = SG(lambda: islice(dataset(options.train_file), 0, nin(options.train_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Training set size: {}'.format(len(train_data))) validation_data = None if options.validation_file: validation_data = SG( lambda: islice(dataset(options.validation_file), 0, nin(options.validation_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Validation set size: {}'.format(len(validation_data))) eval_data = SG(lambda: islice(dataset(options.eval_file), 0, nin(options.eval_size)), length=None) if not hasattr(options, 'verbosity') or options.verbosity >= 4: print('Eval set size: {}'.format(len(eval_data))) learner = learners.new(options.learner) m = [metrics.METRICS[m] for m in options.metrics] if options.load: with open(options.load, 'rb') as infile: learner.load(infile) else: learner.train(train_data, validation_data, metrics=m) model_path = config.get_file_path('model.pkl') if model_path: with open(model_path, 'wb') as outfile: learner.dump(outfile) train_results = evaluate.evaluate( learner, train_data, metrics=m, split_id='train', write_data=options.output_train_data, pass_split=True) output.output_results(train_results, 'train') eval_results = evaluate.evaluate(learner, eval_data, metrics=m, split_id='eval', write_data=options.output_eval_data, pass_split=True) output.output_results(eval_results, 'eval')
def main(): options = config.options() progress.set_resolution(datetime.timedelta(seconds=options.progress_tick)) train_datasets = [] validation_datasets = [] test_datasets = [] if len(options.train_size) == 1: options.train_size = options.train_size * len(options.data_source) else: assert len(options.train_size) == len(options.data_source) if len(options.validation_size) == 1: options.validation_size = options.validation_size * len(options.data_source) else: assert len(options.validation_size) == len(options.data_source) if len(options.test_size) == 1: options.test_size = options.test_size * len(options.data_source) else: assert len(options.test_size) == len(options.data_source) for source, train_size, validation_size, test_size in zip(options.data_source, options.train_size, options.validation_size, options.test_size): train_insts = color_instances.SOURCES[source].train_data( listener=options.listener )[:train_size] if validation_size: assert validation_size < len(train_insts), \ ('No training data after validation split! (%d <= %d)' % (len(train_insts), validation_size)) validation_insts = train_insts[-validation_size:] validation_datasets.append(validation_insts) train_insts = train_insts[:-validation_size] else: validation_datasets.append(None) train_datasets.append(train_insts) test_insts = color_instances.SOURCES[source].test_data( options.listener )[:test_size] test_datasets.append(test_insts) learner = learners.new(options.learner) m = [metrics.log_likelihood, metrics.log_likelihood_bits, metrics.perplexity, metrics.aic] example_inst = get_example_inst(test_datasets, train_datasets) if options.listener and not isinstance(example_inst.output, numbers.Integral): m.append(metrics.squared_error) elif isinstance(example_inst.output, (tuple, list)): m.append(metrics.prec1) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro]) else: m.append(metrics.accuracy) if example_inst.output and isinstance(example_inst.output, basestring): m.extend([metrics.bleu, metrics.wer, metrics.token_perplexity_macro, metrics.token_perplexity_micro]) multi_train = (len(options.data_source) > 1) if options.load: with open(options.load, 'rb') as infile: learner.load(infile) train_results = None else: if hasattr(learner, '_data_to_arrays'): # XXX: is there a better way to ensure that the vocabulary is defined # before training starts? for train_insts in train_datasets[1:]: learner._data_to_arrays(train_insts, init_vectorizer=True) for i, (source, train_insts, validation_insts) in enumerate(zip(options.data_source, train_datasets, validation_datasets)): if not train_insts: continue if i > 0: learner.train(train_insts, validation_insts, metrics=m, keep_params=True) else: learner.train(train_insts, validation_insts, metrics=m) with open(config.get_file_path('model.p'), 'wb') as outfile: learner.dump(outfile) if multi_train: split_id = 'train_' + source else: split_id = 'train' train_results = evaluate.evaluate(learner, train_insts, metrics=m, split_id=split_id, write_data=options.output_train_data) if options.verbosity != 0: output.output_results(train_results, split_id) for i, (source, test_insts) in enumerate(zip(options.data_source, test_datasets)): if not test_insts: continue if multi_train: split_id = 'eval_' + source else: split_id = 'eval' test_results = evaluate.evaluate(learner, test_insts, metrics=m, split_id=split_id, write_data=options.output_test_data) if options.verbosity != 0: output.output_results(test_results, split_id) return train_results, test_results