Esempio n. 1
 def run(self):
     D = training_instances.get_generation_instances(
     splits = None
     if self.test_size:
         splits = ShuffleSplit(n=len(D),
         splits = KFold(n=len(D),, shuffle=True)
     cross_val_results = defaultdict(list)
     for fold_index, (train_indices, test_indices) in enumerate(splits):
         train = [D[i] for i in train_indices]
         test = [D[i] for i in test_indices]
         params = self.set_hyperparameters(train)
         run_results = self.crossval_run(test,
         print "======================================================================"
         print params
         print run_results
         for key, val in run_results.items():
     for key, vals in sorted(cross_val_results.items()):
         modelname, metricname = key
         lower, upper = confidence_interval(vals)
         print "%s mean %s: %0.03f (%0.03f-%0.03f)" % (
             modelname, metricname, np.mean(vals), lower, upper)
     pickle.dump(self.log, file(self.logfile, 'w'), 2)
Esempio n. 2
def pooled_experiment(agentname='literal'):
    # Collapse across folds:
    results = defaultdict(lambda: defaultdict(list))
    for dirname in ('furniture', 'people'):
        log = pickle.load(file("logs/log_%s.pickle" % dirname))
        for d in log:
            fold = d['fold_index']
            acc = d[agentname]['evaluations']['instance_accuracy']
            dice = d[agentname]['evaluations']['multiset_dice']
    # Means for the folds:
    pooled = defaultdict(dict)
    for fold, metric_vals in results.items():
        for metric, vals in metric_vals.items():
            pooled[metric][fold] = np.mean(vals)
    # Stats across the folds:
    runs = {}
    for metric, fold_dict in pooled.items():
        fold_vals = np.array(fold_dict.values())
        mu = np.mean(fold_vals)
        upper, lower = confidence_interval(fold_vals)
        print '%s mean %s: %0.03f (ci %0.03f, %0.03f)' % (agentname, metric,
                                                          mu, upper, lower)
        runs[metric] = fold_vals
    return runs
Esempio n. 3
def predicted_vs_actual_length(log, agentname='pragmatic'):
    deltas = []
    for d in log:
        results = d[agentname]
        delta = len(results['prediction']) - len(results['actual'])
    upper, lower = confidence_interval(deltas)
    print '%s mean difference: %0.02f (%0.02f, %0.02f 95%% ci)' % (
        agentname, np.mean(deltas), upper, lower)
Esempio n. 4
 def evaluation_report(self, all_results, verbose=0, split_info=None):
     errors = np.array([d['error'] for d in all_results])
     iterations = np.array([d['iterations'] for d in all_results])
     print "======================================================================"
     print "Type: %s" % self.typ
     print "Domain: %s" % self.dirname
     print "Features: %s" % self.phi.__name__
     print split_info
     print "Learning rate: %s" % self.eta
     print "L2 coefs:", [r['l2_coeff'] for r in all_results]
     print "Mean iterations to convergence:  %0.3f (+/- %0.3f)" % (
         iterations.mean(), iterations.std() * 2)
     for metric in self.metrics:
         vals = np.array(
             [d['evaluations'][metric.__name__] for d in all_results])
         ci = confidence_interval(vals)
         print "Mean %s: %0.3f (%.3f--%.3f)" % (metric.__name__,
                                                vals.mean(), ci[0], ci[1])
Esempio n. 5
 def crossvalidate(self):
     kf = KFold(n=len(self.filenames),, shuffle=True)
     summaries = []
     temps = []
     for train_indices, test_indices in kf:
         train = [self.filenames[i] for i in train_indices]
         temp, nullcost = self.set_hyperparameters(train)
         test = [self.filenames[i] for i in test_indices]
         all_reports =, temperature=temp, nullcost=nullcost)
         summary = self.summarize(all_reports)
         print 'Temp: %s; nullcost: %s; %s' % (temp, nullcost, str(summary))
     for name in ('Literal', 'Pragmatic', 'Speaker'):
         vals = np.array([s[name] for s in summaries])
         ci = confidence_interval(vals)
         print "%s mean accuracy: %0.2f (%0.2f-%0.2f)" % (name, vals.mean(),
                                                          ci[0], ci[1])
Esempio n. 6
def triple_errors(output_folder, triple):
    from parsers import CVOutputParser
    from utils import interpolate, avg, confidence_interval
    import math
    from collections import Counter
    import os

    Plot accumulated errors for estimators against pair triple ratios.
    Ratios are binned in the range 0.0 to 1.0.
    if not output_folder[-1] == "/":
        output_folder += "/"

    iteration = -1
    max_ent_errors = []
    ext_errors = []
    max_ent_abs_errors = []
    ext_abs_errors = []
    samples_ignored = 0
    while True:
        iteration += 1
        max_ent_est_file = output_folder + str(iteration) + "_data.tsv"
        ext_est_file = output_folder + str(iteration) + "_data_extrapolation.tsv"
        # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
        # read baseline also?
        # Read until we do not find an output file
        if not os.path.exists(max_ent_est_file):

        # Read the maxent estimate
        found = False
        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                # if s123 == 0:
                #     break
                found = True
                max_ent_errors.append(est - obs)
                max_ent_abs_errors.append(abs(obs - est))

        if not found:
            samples_ignored += 1

        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(ext_est_file):
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                ext_errors.append(est - obs)
                ext_abs_errors.append(abs(obs - est))

    # maxent confidence interval
    maxent_ci = confidence_interval(max_ent_errors)
    # extrapolation confidence interval
    ext_ci = confidence_interval(ext_errors)

    print "samples ignored: ", samples_ignored
    print "maxent avg error: ", round(avg(max_ent_errors), 1)
    print "maxent 95% confidence interval: ", (round(maxent_ci[0], 1), round(maxent_ci[1], 2))
    print "extrapolation avg error: ", round(avg(ext_errors), 1)
    print "extrapolation 95% confidence interval: ", (round(ext_ci[0], 1), round(ext_ci[1], 2))

    # round
    max_ent_errors_rounded = [round(x, 1) for x in max_ent_errors]
    ext_errors_rounded = [round(x, 1) for x in ext_errors]

    # plot
    xlabel("Estimate error")
    ylabel("Bucket size")
    # text(0.1, 0.8, 'Maxent')
    # text(0.1, 0.7, 'avg. error: ' + str(avg(max_ent_errors)))
    # text(0.1, 0.6, '95% conf. interval: ' + str(maxent_ci))

    # text(0.5, 0.8, 'Extrapolation')
    # text(0.5, 0.7, 'avg. error: ' + str(avg(ext_errors)))
    # text(0.5, 0.6, '95% conf. interval: ' + str(ext_ci))

    hist([max_ent_errors_rounded, ext_errors_rounded], color=("b", "r"))

    return max_ent_errors, max_ent_abs_errors, ext_errors, ext_abs_errors
	def test_confidence_interval(self):
		assert utils.confidence_interval([2,2,2,2]) == 0[1,2,3,4]), 1.096,3)
		assert utils.confidence_interval([2,2,4,4]) == 0.98