Exemple #1
0
def do_baseline_runs(expt):
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)

    all_results = []
    for run_idx in range(expt.num_runs):
        test_data = gen.generate_simulated_data(expt.num_test_frames)

        # There's a problem here if there's only one data point, since
        # then we end up with a variance of 0.  We currently hack
        # around this problem by guaranteeing more than one point.  We
        # could change the models to allow zero variance but this will
        # mean not being able to make samples from the models without
        # some extra work.  Note that we don't care at all about order
        # of training data in these experiments, so we just build our
        # training data in two parts and cat them together.  If you
        # hit either of these asserts, you're asking for an error rate
        # that's too hig and/or a training data size that's too low.
        # We need two correct samples per phoneme.
        num_secondary_frames = expt.num_training_frames - expt.num_phonemes * 2
        num_errorful_frames = expt.num_training_frames * expt.training_error_rate
        assert expt.num_training_frames >= expt.num_phonemes * 2
        assert num_secondary_frames > num_errorful_frames
        errorless_training_data = gen.generate_simulated_data_per_phoneme(2)
        secondary_training_data = gen.generate_simulated_data(
            num_secondary_frames)

        # Slight trickiness to get a correct error rate for this subset of the data
        subset_error_rate = float(num_errorful_frames) / num_secondary_frames
        errorful_training_data, num_errors = gen.add_errors_to_data(
            secondary_training_data, subset_error_rate)

        practice_data = gen.generate_simulated_data(expt.num_practice_frames)
        errorful_practice_data, num_errors = gen.add_errors_to_data(
            practice_data, expt.practice_error_rate)

        training_data = errorless_training_data + errorful_training_data + errorful_practice_data

        c = SimpleClassifier(gen.get_labels(), gen.num_features)
        c.train_all(training_data)

        (rate, results) = measureAccuracy(c, test_data)
        name = "Baseline 0.%d" % (run_idx, )
        summary = make_summary_string(name, rate, results, c, test_data, gen)
        all_results.append((name, rate))

        # print "Classifier:\n"
        # print c.to_string()
        # print summary
    print "\n--------------------------Summary-----------------------"
    print make_all_runs_summary_string(expt, all_results)
Exemple #2
0
def do_baseline_runs(expt):
    gen = DataGenerator(expt.num_phonemes, expt.num_features,
                        expt.var_diag_interval, expt.var_offdiag_interval)

    all_results = []
    for run_idx in range(expt.num_runs):
        test_data = gen.generate_simulated_data(expt.num_test_frames)

        # There's a problem here if there's only one data point, since
        # then we end up with a variance of 0.  We currently hack
        # around this problem by guaranteeing more than one point.  We
        # could change the models to allow zero variance but this will
        # mean not being able to make samples from the models without
        # some extra work.  Note that we don't care at all about order
        # of training data in these experiments, so we just build our
        # training data in two parts and cat them together.  If you
        # hit either of these asserts, you're asking for an error rate
        # that's too hig and/or a training data size that's too low.
        # We need two correct samples per phoneme.
        num_secondary_frames  =  expt.num_training_frames - expt.num_phonemes * 2
        num_errorful_frames = expt.num_training_frames * expt.training_error_rate 
        assert expt.num_training_frames >= expt.num_phonemes * 2
        assert num_secondary_frames >  num_errorful_frames
        errorless_training_data = gen.generate_simulated_data_per_phoneme(2)
        secondary_training_data = gen.generate_simulated_data(num_secondary_frames)

        # Slight trickiness to get a correct error rate for this subset of the data
        subset_error_rate = float(num_errorful_frames) / num_secondary_frames
        errorful_training_data, num_errors = gen.add_errors_to_data(secondary_training_data, subset_error_rate)

        practice_data = gen.generate_simulated_data(expt.num_practice_frames)
        errorful_practice_data, num_errors = gen.add_errors_to_data(practice_data, expt.practice_error_rate)

        training_data = errorless_training_data + errorful_training_data + errorful_practice_data

        c = SimpleClassifier(gen.get_labels(), gen.num_features)
        c.train_all(training_data)

        (rate, results) = measureAccuracy(c, test_data)
        name = "Baseline 0.%d" % (run_idx,)
        summary = make_summary_string(name, rate, results, c, test_data, gen)
        all_results.append((name, rate))

        # print "Classifier:\n"
        # print c.to_string()
        # print summary
    print "\n--------------------------Summary-----------------------"
    print make_all_runs_summary_string(expt, all_results)