Ejemplo n.º 1
0
def test_custom_learner_model_loading():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_model_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_model_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    # run the configuration that trains the custom model and saves it
    cfgfile = 'test_model_save_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    # save the predictions from disk into memory
    # and delete the predictions file
    outprefix = 'test_model_custom_learner'
    pred_file = join(_my_dir, 'output',
                     '{}_{}_CustomLogisticRegressionWrapper'
                     '.predictions'.format(outprefix,
                                           outprefix))
    preds1 = read_predictions(pred_file)
    os.unlink(pred_file)

    # run the configuration that loads the saved model
    # and generates the predictions again
    cfgfile = 'test_model_load_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, overwrite=False, quiet=True)

    # load the newly generated predictions
    preds2 = read_predictions(pred_file)

    # make sure that they are the same as before
    assert_array_equal(preds1, preds2)
Ejemplo n.º 2
0
def test_ablation_cv_feature_hasher_all_combos_sampler():
    """
    Test ablation all-combos + cross-validation + feature hashing + samplers
    """

    config_template_path = join(
        _my_dir, 'configs',
        'test_ablation_feature_hasher_sampler_all_combos.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
    # lines
    with open(
            join(_my_dir, 'output',
                 'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(
        glob(
            join(_my_dir, 'output',
                 'ablation_cv_feature_hasher_sampler_all_combos*.results')))
    eq_(num_result_files, 20)
Ejemplo n.º 3
0
def test_ablation_cv_feature_hasher_sampler():
    """
    Test to validate whether ablation works with cross-validate
    and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs', ('test_ablation_feature_'
                                                     'hasher_sampler.template'
                                                     '.cfg'))
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=1)

    # read in the summary file and make sure it has
    # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 154)

    # make sure there are 6 ablated featuresets * 2 learners = 12 results files
    num_result_files = len(
        glob.glob(
            join(_my_dir, 'output', ('ablation_cv_feature_hasher_'
                                     '*.results'))))
    eq_(num_result_files, 14)
Ejemplo n.º 4
0
def test_learning_curve_output():
    """
    Test learning curve output for experiment with metrics option
    """

    # Test to validate learning curve output
    make_learning_curve_data()

    config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    # run the learning curve experiment
    run_configuration(config_path, quiet=True)
    outprefix = 'test_learning_curve'

    # make sure that the TSV file is created with the right columns
    output_tsv_path = join(_my_dir, 'output', '{}_summary.tsv'.format(outprefix))
    ok_(exists(output_tsv_path))
    with open(output_tsv_path, 'r') as tsvf:
        r = csv.reader(tsvf, dialect=csv.excel_tab)
        header = next(r)
        # make sure we have the expected number of columns
        eq_(len(header), 11)
        num_rows = len(list(r))
        # we should have 2 featuresets x 3 learners x 2 objectives x 5 (default)
        # training sizes = 60 rows
        eq_(num_rows, 60)

    # make sure that the two PNG files (one per featureset) are created
    # if the requirements are satisfied
    if _HAVE_SEABORN:
        for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
            ok_(exists(join(_my_dir,
                            'output',
                            '{}_{}.png'.format(outprefix, featureset_name))))
Ejemplo n.º 5
0
def test_ablation_cv_feature_hasher():
    """
    Test if ablation works with cross-validate and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs',
                                'test_ablation_feature_hasher.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=1)

    # read in the summary file and make sure it has
    # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 154)

    # make sure there are 6 ablated featuresets * 2 learners = 12 results files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*.results'))))
    eq_(num_result_files, 14)
def test_ablation_cv_feature_hasher_all_combos():
    """
    Test ablation all-combos + cross-validation + feature hashing
    """

    config_template_path = join(_my_dir,
                                'configs',
                                'test_ablation_feature_hasher_all_combos.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    #    10 ablated featuresets
    #      * (10 folds + 1 average line)
    #      * 2 learners
    #    = 220 lines in total
    with open(join(_my_dir,
                   'output',
                   'ablation_cv_feature_hasher_all_combos_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob(join(_my_dir,
                                     'output',
                                     'ablation_cv_feature_hasher_all_combos*.results')))
    eq_(num_result_files, 20)
Ejemplo n.º 7
0
def test_ablation_cv_feature_hasher_all_combos_sampler():
    """
    Test to validate whether ablation works with cross-validate
    and feature_hasher
    """
    make_ablation_data()

    config_template_path = join(_my_dir, 'configs', ('test_ablation_feature_'
                                                     'hasher_sampler.template'
                                                     '.cfg'))
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=None)

    # read in the summary file and make sure it has
    # 10 ablated featuresets * (10 folds + 1 average line) * 2 learners = 220
    # lines
    with open(join(_my_dir, 'output',
                   'ablation_cv_feature_hasher_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 220)

    # make sure there are 10 ablated featuresets * 2 learners = 20 results
    # files
    num_result_files = len(glob.glob(join(_my_dir, 'output',
                                          ('ablation_cv_feature_hasher_'
                                           '*results'))))
    eq_(num_result_files, 20)
Ejemplo n.º 8
0
def test_learning_curve_plots():
    """
    Test learning curve plots for experiment with metrics option
    """

    # Test to validate learning curve output
    make_learning_curve_data()

    config_template_path = join(_my_dir, 'configs', 'test_learning_curve.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    # run the learning curve experiment
    run_configuration(config_path, quiet=True)
    outprefix = 'test_learning_curve'

    # make sure that the two PNG files (one per featureset) are created
    for featureset_name in ["test_learning_curve1", "test_learning_curve2"]:
        ok_(exists(join(_my_dir,
                        'output',
                        '{}_{}.png'.format(outprefix, featureset_name))))
Ejemplo n.º 9
0
def test_logistic_custom_learner():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    cfgfile = 'test_logistic_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = 'test_logistic_custom_learner'
    preds = read_predictions(join(_my_dir, 'output',
                                  ('{}_{}_CustomLogisticRegressionWrapper'
                                   '.predictions'.format(outprefix,
                                                         outprefix))))

    expected = read_predictions(join(_my_dir, 'output',
                                     ('{}_{}_LogisticRegression.predictions'
                                      .format(outprefix, outprefix))))

    assert_array_equal(preds, expected)
def test_logistic_custom_learner():
    num_labels = 10

    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_logistic_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    cfgfile = 'test_logistic_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = 'test_logistic_custom_learner'
    preds = read_predictions(join(_my_dir, 'output',
                                  ('{}_{}_CustomLogisticRegressionWrapper'
                                   '_predictions.tsv'.format(outprefix,
                                                             outprefix))))

    expected = read_predictions(join(_my_dir, 'output',
                                     ('{}_{}_LogisticRegression_predictions.tsv'
                                      .format(outprefix, outprefix))))

    assert_array_equal(preds, expected)
Ejemplo n.º 11
0
def test_majority_class_custom_learner():
    num_labels = 10

    # This will make data where the last class happens about 50% of the time.
    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_majority_class_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_majority_class_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    cfgfile = 'test_majority_class_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = 'test_majority_class_custom_learner'

    preds = read_predictions(
        join(_my_dir, 'output',
             ('{}_{}_MajorityClassLearner_predictions.tsv'.format(
                 outprefix, outprefix))))
    expected = np.array([float(num_labels - 1) for x in preds])
    assert_array_equal(preds, expected)
def test_majority_class_custom_learner():
    num_labels = 10

    # This will make data where the last class happens about 50% of the time.
    class_weights = [(0.5 / (num_labels - 1))
                     for x in range(num_labels - 1)] + [0.5]
    train_fs, test_fs = make_classification_data(num_examples=600,
                                                 train_test_ratio=0.8,
                                                 num_labels=num_labels,
                                                 num_features=5,
                                                 non_negative=True,
                                                 class_weights=class_weights)

    # Write training feature set to a file
    train_path = join(_my_dir, 'train',
                      'test_majority_class_custom_learner.jsonlines')
    writer = NDJWriter(train_path, train_fs)
    writer.write()

    # Write test feature set to a file
    test_path = join(_my_dir, 'test',
                     'test_majority_class_custom_learner.jsonlines')
    writer = NDJWriter(test_path, test_fs)
    writer.write()

    cfgfile = 'test_majority_class_custom_learner.template.cfg'
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = 'test_majority_class_custom_learner'

    preds = read_predictions(join(_my_dir, 'output',
                                  ('{}_{}_MajorityClassLearner_predictions.tsv'
                                   .format(outprefix, outprefix))))
    expected = np.array([float(num_labels - 1) for x in preds])
    assert_array_equal(preds, expected)
Ejemplo n.º 13
0
def test_ablation_cv():
    """
    Test ablation + cross-validation
    """

    config_template_path = join(_my_dir, 'configs',
                                'test_ablation.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True, ablation=1)

    # read in the summary file and make sure it has
    # 7 ablated featuresets * (10 folds + 1 average line) * 2 learners = 154
    # lines
    with open(join(_my_dir, 'output', 'ablation_cv_plain_summary.tsv')) as f:
        reader = csv.DictReader(f, dialect=csv.excel_tab)
        num_rows = check_ablation_rows(reader)
        eq_(num_rows, 154)

    # make sure there are 7 ablated featuresets * 2 learners = 12 results files
    num_result_files = len(
        glob(join(_my_dir, 'output', 'ablation_cv_plain*.results')))
    eq_(num_result_files, 14)
Ejemplo n.º 14
0
def test_class_map_feature_hasher():
    """
    Test class maps with feature hashing
    """

    make_class_map_data()

    config_template_path = join(_my_dir, 'configs',
                                'test_class_map_feature_hasher.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(join(_my_dir, 'output', ('test_class_map_test_class_map_'
                                       'LogisticRegression.results.'
                                       'json'))) as f:
        # outstr = f.read()
        outd = json.loads(f.read())
        # logistic_result_score = float(
        #     SCORE_OUTPUT_RE.search(outstr).groups()[0])
        logistic_result_score = outd[0]['score']

    assert_almost_equal(logistic_result_score, 0.5)
Ejemplo n.º 15
0
def test_class_map():
    """
    Test class maps
    """

    make_class_map_data()

    config_template_path = join(_my_dir, 'configs',
                                'test_class_map.template.cfg')
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    with open(
            join(_my_dir, 'output', ('test_class_map_test_class_map_Logistic'
                                     'Regression.results.json'))) as f:
        outd = json.loads(f.read())
        # outstr = f.read()
        # logistic_result_score = float(
        # SCORE_OUTPUT_RE.search(outstr).groups()[0])
        logistic_result_score = outd[0]['accuracy']

    assert_almost_equal(logistic_result_score, 0.5)
Ejemplo n.º 16
0
def check_summary_score(use_feature_hashing=False):

    # Test to validate summary file scores
    make_summary_data()

    cfgfile = ('test_summary_feature_hasher.template.cfg' if
               use_feature_hashing else 'test_summary.template.cfg')
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = ('test_summary_feature_hasher_test_summary' if
                 use_feature_hashing else 'test_summary_test_summary')
    summprefix = ('test_summary_feature_hasher' if use_feature_hashing else
                  'test_summary')

    with open(join(_my_dir, 'output', ('{}_LogisticRegression.results.'
                                       'json'.format(outprefix)))) as f:
        outd = json.loads(f.read())
        logistic_result_score = outd[0]['score']

    with open(join(_my_dir, 'output',
                   '{}_SVC.results.json'.format(outprefix))) as f:
        outd = json.loads(f.read())
        svm_result_score = outd[0]['score']

    # note that Naive Bayes doesn't work with feature hashing
    if not use_feature_hashing:
        with open(join(_my_dir, 'output', ('{}_MultinomialNB.results.'
                                           'json'.format(outprefix)))) as f:
            outd = json.loads(f.read())
            naivebayes_result_score = outd[0]['score']

    with open(join(_my_dir, 'output', '{}_summary.tsv'.format(summprefix)),
              'r') as f:
        reader = csv.DictReader(f, dialect='excel-tab')

        for row in reader:
            # the learner results dictionaries should have 27 rows,
            # and all of these except results_table
            # should be printed (though some columns will be blank).
            eq_(len(row), 27)
            assert row['model_params']
            assert row['grid_score']
            assert row['score']

            if row['learner_name'] == 'LogisticRegression':
                logistic_summary_score = float(row['score'])
            elif row['learner_name'] == 'MultinomialNB':
                naivebayes_summary_score = float(row['score'])
            elif row['learner_name'] == 'SVC':
                svm_summary_score = float(row['score'])

    test_tuples = [(logistic_result_score,
                    logistic_summary_score,
                    'LogisticRegression'),
                   (svm_result_score,
                    svm_summary_score,
                    'SVC')]

    if not use_feature_hashing:
        test_tuples.append((naivebayes_result_score,
                            naivebayes_summary_score,
                            'MultinomialNB'))

    for result_score, summary_score, learner_name in test_tuples:
        assert_almost_equal(result_score, summary_score,
                            err_msg=('mismatched scores for {} '
                                     '(result:{}, summary:'
                                     '{})').format(learner_name, result_score,
                                                   summary_score))
Ejemplo n.º 17
0
def check_summary_score(use_feature_hashing=False):

    # Test to validate summary file scores
    make_summary_data()

    cfgfile = ('test_summary_feature_hasher.template.cfg' if
               use_feature_hashing else 'test_summary.template.cfg')
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = ('test_summary_feature_hasher_test_summary' if
                 use_feature_hashing else 'test_summary_test_summary')
    summprefix = ('test_summary_feature_hasher' if use_feature_hashing else
                  'test_summary')

    with open(join(_my_dir, 'output', ('{}_LogisticRegression.results.'
                                       'json'.format(outprefix)))) as f:
        outd = json.loads(f.read())
        logistic_result_score = outd[0]['score']

    with open(join(_my_dir, 'output',
                   '{}_SVC.results.json'.format(outprefix))) as f:
        outd = json.loads(f.read())
        svm_result_score = outd[0]['score']

    # note that Naive Bayes doesn't work with feature hashing
    if not use_feature_hashing:
        with open(join(_my_dir, 'output', ('{}_MultinomialNB.results.'
                                           'json'.format(outprefix)))) as f:
            outd = json.loads(f.read())
            naivebayes_result_score = outd[0]['score']

    with open(join(_my_dir, 'output', '{}_summary.tsv'.format(summprefix)),
              'r') as f:
        reader = csv.DictReader(f, dialect='excel-tab')

        for row in reader:
            # the learner results dictionaries should have 29 rows,
            # and all of these except results_table
            # should be printed (though some columns will be blank).
            eq_(len(row), 29)
            assert row['model_params']
            assert row['grid_score']
            assert row['score']

            if row['learner_name'] == 'LogisticRegression':
                logistic_summary_score = float(row['score'])
            elif row['learner_name'] == 'MultinomialNB':
                naivebayes_summary_score = float(row['score'])
            elif row['learner_name'] == 'SVC':
                svm_summary_score = float(row['score'])

    test_tuples = [(logistic_result_score,
                    logistic_summary_score,
                    'LogisticRegression'),
                   (svm_result_score,
                    svm_summary_score,
                    'SVC')]

    if not use_feature_hashing:
        test_tuples.append((naivebayes_result_score,
                            naivebayes_summary_score,
                            'MultinomialNB'))

    for result_score, summary_score, learner_name in test_tuples:
        assert_almost_equal(result_score, summary_score,
                            err_msg=('mismatched scores for {} '
                                     '(result:{}, summary:'
                                     '{})').format(learner_name, result_score,
                                                   summary_score))

    # We itereate over each model with an expected
    # accuracy score. T est proves that the report
    # written out at least as a correct format for
    # this line. See _print_fancy_output
    for report_name, val in (("LogisticRegression", .5),
                             ("MultinomialNB", .5),
                             ("SVC", .7)):
        filename = "test_summary_test_summary_{}.results".format(report_name)
        results_path = join(_my_dir, 'output', filename)
        with open(results_path) as results_file:
            report = results_file.read()
            expected_string = "Accuracy = {:.1f}".format(val)
            eq_(expected_string in report,  # approximate
                True,
                msg="{} is not in {}".format(expected_string,
                                             report))
Ejemplo n.º 18
0
def check_summary_score(use_feature_hashing=False):

    # Test to validate summary file scores
    make_summary_data()

    cfgfile = ('test_summary_feature_hasher.template.cfg'
               if use_feature_hashing else 'test_summary.template.cfg')
    config_template_path = join(_my_dir, 'configs', cfgfile)
    config_path = fill_in_config_paths(config_template_path)

    run_configuration(config_path, quiet=True)

    outprefix = ('test_summary_feature_hasher_test_summary'
                 if use_feature_hashing else 'test_summary_test_summary')
    summprefix = ('test_summary_feature_hasher'
                  if use_feature_hashing else 'test_summary')

    with open(
            join(_my_dir, 'output', ('{}_LogisticRegression.results.'
                                     'json'.format(outprefix)))) as f:
        outd = json.loads(f.read())
        logistic_result_score = outd[0]['score']

    with open(join(_my_dir, 'output',
                   '{}_SVC.results.json'.format(outprefix))) as f:
        outd = json.loads(f.read())
        svm_result_score = outd[0]['score']

    # note that Naive Bayes doesn't work with feature hashing
    if not use_feature_hashing:
        with open(
                join(_my_dir, 'output', ('{}_MultinomialNB.results.'
                                         'json'.format(outprefix)))) as f:
            outd = json.loads(f.read())
            naivebayes_result_score = outd[0]['score']

    with open(join(_my_dir, 'output', '{}_summary.tsv'.format(summprefix)),
              'r') as f:
        reader = csv.DictReader(f, dialect='excel-tab')

        for row in reader:
            # the learner results dictionaries should have 29 rows,
            # and all of these except results_table
            # should be printed (though some columns will be blank).
            eq_(len(row), 29)
            assert row['model_params']
            assert row['grid_score']
            assert row['score']

            if row['learner_name'] == 'LogisticRegression':
                logistic_summary_score = float(row['score'])
            elif row['learner_name'] == 'MultinomialNB':
                naivebayes_summary_score = float(row['score'])
            elif row['learner_name'] == 'SVC':
                svm_summary_score = float(row['score'])

    test_tuples = [(logistic_result_score, logistic_summary_score,
                    'LogisticRegression'),
                   (svm_result_score, svm_summary_score, 'SVC')]

    if not use_feature_hashing:
        test_tuples.append((naivebayes_result_score, naivebayes_summary_score,
                            'MultinomialNB'))

    for result_score, summary_score, learner_name in test_tuples:
        assert_almost_equal(result_score,
                            summary_score,
                            err_msg=('mismatched scores for {} '
                                     '(result:{}, summary:'
                                     '{})').format(learner_name, result_score,
                                                   summary_score))

    # We iterate over each model with an expected
    # accuracy score. Test proves that the report
    # written out at least has a correct format for
    # this line. See _print_fancy_output
    for report_name, val in (("LogisticRegression", .5), ("MultinomialNB", .5),
                             ("SVC", .6333)):
        filename = "test_summary_test_summary_{}.results".format(report_name)
        results_path = join(_my_dir, 'output', filename)
        with open(results_path) as results_file:
            report = results_file.read()
            expected_string = "Accuracy = {:.1f}".format(val)
            eq_(
                expected_string in report,  # approximate
                True,
                msg="{} is not in {}".format(expected_string, report))