def do_baseline_runs(expt): gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) all_results = [] for run_idx in range(expt.num_runs): test_data = gen.generate_simulated_data(expt.num_test_frames) # There's a problem here if there's only one data point, since # then we end up with a variance of 0. We currently hack # around this problem by guaranteeing more than one point. We # could change the models to allow zero variance but this will # mean not being able to make samples from the models without # some extra work. Note that we don't care at all about order # of training data in these experiments, so we just build our # training data in two parts and cat them together. If you # hit either of these asserts, you're asking for an error rate # that's too hig and/or a training data size that's too low. # We need two correct samples per phoneme. num_secondary_frames = expt.num_training_frames - expt.num_phonemes * 2 num_errorful_frames = expt.num_training_frames * expt.training_error_rate assert expt.num_training_frames >= expt.num_phonemes * 2 assert num_secondary_frames > num_errorful_frames errorless_training_data = gen.generate_simulated_data_per_phoneme(2) secondary_training_data = gen.generate_simulated_data( num_secondary_frames) # Slight trickiness to get a correct error rate for this subset of the data subset_error_rate = float(num_errorful_frames) / num_secondary_frames errorful_training_data, num_errors = gen.add_errors_to_data( secondary_training_data, subset_error_rate) practice_data = gen.generate_simulated_data(expt.num_practice_frames) errorful_practice_data, num_errors = gen.add_errors_to_data( practice_data, expt.practice_error_rate) training_data = errorless_training_data + errorful_training_data + errorful_practice_data c = SimpleClassifier(gen.get_labels(), gen.num_features) c.train_all(training_data) (rate, results) = measureAccuracy(c, test_data) name = "Baseline 0.%d" % (run_idx, ) summary = make_summary_string(name, rate, results, c, test_data, gen) all_results.append((name, rate)) # print "Classifier:\n" # print c.to_string() # print summary print "\n--------------------------Summary-----------------------" print make_all_runs_summary_string(expt, all_results)
def do_baseline_runs(expt): gen = DataGenerator(expt.num_phonemes, expt.num_features, expt.var_diag_interval, expt.var_offdiag_interval) all_results = [] for run_idx in range(expt.num_runs): test_data = gen.generate_simulated_data(expt.num_test_frames) # There's a problem here if there's only one data point, since # then we end up with a variance of 0. We currently hack # around this problem by guaranteeing more than one point. We # could change the models to allow zero variance but this will # mean not being able to make samples from the models without # some extra work. Note that we don't care at all about order # of training data in these experiments, so we just build our # training data in two parts and cat them together. If you # hit either of these asserts, you're asking for an error rate # that's too hig and/or a training data size that's too low. # We need two correct samples per phoneme. num_secondary_frames = expt.num_training_frames - expt.num_phonemes * 2 num_errorful_frames = expt.num_training_frames * expt.training_error_rate assert expt.num_training_frames >= expt.num_phonemes * 2 assert num_secondary_frames > num_errorful_frames errorless_training_data = gen.generate_simulated_data_per_phoneme(2) secondary_training_data = gen.generate_simulated_data(num_secondary_frames) # Slight trickiness to get a correct error rate for this subset of the data subset_error_rate = float(num_errorful_frames) / num_secondary_frames errorful_training_data, num_errors = gen.add_errors_to_data(secondary_training_data, subset_error_rate) practice_data = gen.generate_simulated_data(expt.num_practice_frames) errorful_practice_data, num_errors = gen.add_errors_to_data(practice_data, expt.practice_error_rate) training_data = errorless_training_data + errorful_training_data + errorful_practice_data c = SimpleClassifier(gen.get_labels(), gen.num_features) c.train_all(training_data) (rate, results) = measureAccuracy(c, test_data) name = "Baseline 0.%d" % (run_idx,) summary = make_summary_string(name, rate, results, c, test_data, gen) all_results.append((name, rate)) # print "Classifier:\n" # print c.to_string() # print summary print "\n--------------------------Summary-----------------------" print make_all_runs_summary_string(expt, all_results)