コード例 #1
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_check_section_order_not_enough_sections():
    general_sections = ['evaluation', 'sysinfo']
    special_sections = ['placeholder_special_section']
    custom_sections = ['custom.ipynb']
    subgroups = ['prompt', 'gender']
    section_order = general_sections
    get_ordered_notebook_files(general_sections,
                               special_sections=special_sections,
                               custom_sections=custom_sections,
                               section_order=section_order,
                               subgroups=subgroups)
コード例 #2
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_check_section_order_not_enough_sections():
    general_sections = ['evaluation', 'sysinfo']
    special_sections = ['placeholder_special_section']
    custom_sections = ['custom.ipynb']
    subgroups = ['prompt', 'gender']
    section_order = general_sections
    get_ordered_notebook_files(general_sections,
                               special_sections=special_sections,
                               custom_sections=custom_sections,
                               section_order=section_order,
                               subgroups=subgroups)
コード例 #3
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_check_section_order_wrong_sections():
    general_sections = ['evaluation', 'sysinfo']
    special_sections = ['placeholder_special_section']
    custom_sections = ['custom.ipynb']
    subgroups = []
    section_order = ['extra_section1', 'extra_section2']
    get_ordered_notebook_files(general_sections,
                               special_sections=special_sections,
                               custom_sections=custom_sections,
                               section_order=section_order,
                               subgroups=subgroups)
コード例 #4
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_check_section_order_wrong_sections():
    general_sections = ['evaluation', 'sysinfo']
    special_sections = ['placeholder_special_section']
    custom_sections = ['custom.ipynb']
    subgroups = []
    section_order = ['extra_section1', 'extra_section2']
    get_ordered_notebook_files(general_sections,
                               special_sections=special_sections,
                               custom_sections=custom_sections,
                               section_order=section_order,
                               subgroups=subgroups)
コード例 #5
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmcompare():
    # custom and general sections, custom order and subgroups
    general_sections = ['feature_descriptives',
                        'score_distributions',
                        'features_by_group']
    custom_sections = ['/test_path/custom.ipynb']
    subgroups = ['prompt']
    section_order = ['feature_descriptives',
                     'score_distributions',
                     'custom',
                     'features_by_group']
    comparison_notebook_path = notebook_path_dict['general']['rsmcompare']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                custom_sections=custom_sections,
                                                section_order=section_order,
                                                subgroups=subgroups,
                                                context='rsmcompare')

    expected_notebook_files = ([join(comparison_notebook_path, 'header.ipynb')] +
                               [join(comparison_notebook_path, s)+'.ipynb' for s in ['feature_descriptives',
                                                                          'score_distributions']] +
                              ['/test_path/custom.ipynb'] +
                              [join(comparison_notebook_path, 'features_by_group.ipynb')] +
                              [join(comparison_notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #6
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmeval():

    # custom and general sections, custom order and subgroups

    general_sections = ['evaluation', 'consistency', 'evaluation_by_group']
    custom_sections = ['/test_path/custom.ipynb']
    subgroups = ['prompt']
    section_order = ['evaluation',
                     'consistency',
                     'custom',
                     'evaluation_by_group']
    notebook_path = notebook_path_dict['general']['rsmeval']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                custom_sections=custom_sections,
                                                section_order=section_order,
                                                subgroups=subgroups,
                                                context='rsmeval')

    expected_notebook_files = ([join(notebook_path, 'header.ipynb')] +
                               [join(notebook_path, s)+'.ipynb' for s in ['evaluation',
                                                                         'consistency']] +
                              ['/test_path/custom.ipynb'] +
                              [join(notebook_path, 'evaluation_by_group.ipynb')] +
                              [join(notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #7
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmeval():

    # custom and general sections, custom order and subgroups

    general_sections = ['evaluation', 'consistency', 'evaluation_by_group']
    custom_sections = ['/test_path/custom.ipynb']
    subgroups = ['prompt']
    section_order = [
        'evaluation', 'consistency', 'custom', 'evaluation_by_group'
    ]
    notebook_path = notebook_path_dict['general']['rsmeval']
    notebook_files = get_ordered_notebook_files(
        general_sections,
        custom_sections=custom_sections,
        section_order=section_order,
        subgroups=subgroups,
        context='rsmeval')

    expected_notebook_files = (
        [join(notebook_path, 'header.ipynb')] + [
            join(notebook_path, s) + '.ipynb'
            for s in ['evaluation', 'consistency']
        ] + ['/test_path/custom.ipynb'] +
        [join(notebook_path, 'evaluation_by_group.ipynb')] +
        [join(notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #8
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmtool():

    # custom and general sections, custom order and subgroups
    general_sections = ['data_description', 'pca', 'data_description_by_group']
    custom_sections = ['/test_path/custom.ipynb']
    special_sections = ['placeholder_special_section']
    subgroups = ['prompt']
    section_order = ['custom',
                     'data_description',
                     'pca',
                     'data_description_by_group',
                     'placeholder_special_section']
    special_notebook_path = notebook_path_dict['special']['rsmtool']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                custom_sections=custom_sections,
                                                special_sections=special_sections,
                                                section_order=section_order,
                                                subgroups=subgroups,
                                                model_type='skll',
                                                context='rsmtool')

    expected_notebook_files = ([join(notebook_path, 'header.ipynb')] +
                              ['/test_path/custom.ipynb'] +
                              [join(notebook_path, s)+'.ipynb' for s in ['data_description',
                                                                         'pca',
                                                                         'data_description_by_group']] +
                              [join(special_notebook_path, 'placeholder_special_section.ipynb')] +
                              [join(notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #9
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmcompare():
    # custom and general sections, custom order and subgroups
    general_sections = [
        'feature_descriptives', 'score_distributions', 'features_by_group'
    ]
    custom_sections = ['/test_path/custom.ipynb']
    subgroups = ['prompt']
    section_order = [
        'feature_descriptives', 'score_distributions', 'custom',
        'features_by_group'
    ]
    comparison_notebook_path = notebook_path_dict['general']['rsmcompare']
    notebook_files = get_ordered_notebook_files(
        general_sections,
        custom_sections=custom_sections,
        section_order=section_order,
        subgroups=subgroups,
        context='rsmcompare')

    expected_notebook_files = (
        [join(comparison_notebook_path, 'header.ipynb')] + [
            join(comparison_notebook_path, s) + '.ipynb'
            for s in ['feature_descriptives', 'score_distributions']
        ] + ['/test_path/custom.ipynb'] +
        [join(comparison_notebook_path, 'features_by_group.ipynb')] +
        [join(comparison_notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #10
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_custom_rsmtool():

    # custom and general sections, custom order and subgroups
    general_sections = ['data_description', 'pca', 'data_description_by_group']
    custom_sections = ['/test_path/custom.ipynb']
    special_sections = ['placeholder_special_section']
    subgroups = ['prompt']
    section_order = [
        'custom', 'data_description', 'pca', 'data_description_by_group',
        'placeholder_special_section'
    ]
    special_notebook_path = notebook_path_dict['special']['rsmtool']
    notebook_files = get_ordered_notebook_files(
        general_sections,
        custom_sections=custom_sections,
        special_sections=special_sections,
        section_order=section_order,
        subgroups=subgroups,
        model_type='skll',
        context='rsmtool')

    expected_notebook_files = (
        [join(notebook_path, 'header.ipynb')] + ['/test_path/custom.ipynb'] + [
            join(notebook_path, s) + '.ipynb'
            for s in ['data_description', 'pca', 'data_description_by_group']
        ] +
        [join(special_notebook_path, 'placeholder_special_section.ipynb')] +
        [join(notebook_path, 'footer.ipynb')])
    eq_(notebook_files, expected_notebook_files)
コード例 #11
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmcompare():
    general_sections = ['all']
    comparison_notebook_path = notebook_path_dict['general']['rsmcompare']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                context='rsmcompare')
    no_subgroup_list = [s for s in general_section_list_rsmcompare
                        if not s.endswith('by_group')]
    section_list = ['header'] + no_subgroup_list + ['footer']

    general_section_plus_extension = [s+'.ipynb' for s in section_list]
    expected_notebook_files = [join(comparison_notebook_path, s)
                               for s in general_section_plus_extension]
    eq_(notebook_files, expected_notebook_files)
コード例 #12
0
ファイル: test_report.py プロジェクト: jkahn/rsmtool
def test_get_ordered_notebook_files_default_rsmeval():
    general_sections = ['all']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                context='rsmeval')
    no_subgroup_list = [s for s in general_section_list_rsmeval
                        if not s.endswith('by_group')]
    section_list = ['header'] + no_subgroup_list + ['footer']

    general_section_plus_extension = ['{}.ipynb'.format(s) for s in section_list]
    expected_notebook_files = [join(notebook_path_dict['general']['rsmeval'], s)
                               for s in
                               general_section_plus_extension]
    eq_(notebook_files, expected_notebook_files)
コード例 #13
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmeval():
    general_sections = ['all']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                context='rsmeval')
    no_subgroup_list = [s for s in general_section_list_rsmeval
                        if not s.endswith('by_group')]
    section_list = ['header'] + no_subgroup_list + ['footer']

    # replace data_description section with data_description_eval
    updated_section_list = [sname+'_eval' if sname == 'data_description' else sname
                            for sname in section_list]
    general_section_plus_extension = ['{}.ipynb'.format(s) for s in updated_section_list]
    expected_notebook_files = [join(notebook_path_dict['general']['rsmeval'], s)
                               for s in
                               general_section_plus_extension]
    eq_(notebook_files, expected_notebook_files)
コード例 #14
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmtool():
    general_sections = ['all']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                model_type='skll',
                                                context='rsmtool')
    no_subgroup_list = [s for s in general_section_list_rsmtool
                        if not s.endswith('by_group')]
    section_list = ['header'] + no_subgroup_list + ['footer']

    # replace model section with skll_model.

    updated_section_list = ['skll_'+sname if sname == 'model' else sname for sname in section_list]
    general_section_plus_extension = [s+'.ipynb' for s in updated_section_list]
    expected_notebook_files = [join(notebook_path, s)
                               for s in
                               general_section_plus_extension]
    eq_(notebook_files, expected_notebook_files)
コード例 #15
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmcompare():
    general_sections = ['all']
    comparison_notebook_path = notebook_path_dict['general']['rsmcompare']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                context='rsmcompare')
    no_subgroup_list = [
        s for s in general_section_list_rsmcompare
        if not s.endswith('by_group')
    ]
    section_list = ['header'] + no_subgroup_list + ['footer']

    general_section_plus_extension = [s + '.ipynb' for s in section_list]
    expected_notebook_files = [
        join(comparison_notebook_path, s)
        for s in general_section_plus_extension
    ]
    eq_(notebook_files, expected_notebook_files)
コード例 #16
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmeval():
    general_sections = ['all']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                context='rsmeval')
    no_subgroup_list = [
        s for s in general_section_list_rsmeval if not s.endswith('by_group')
    ]
    section_list = ['header'] + no_subgroup_list + ['footer']

    # replace data_description section with data_description_eval
    updated_section_list = [
        sname + '_eval' if sname == 'data_description' else sname
        for sname in section_list
    ]
    general_section_plus_extension = [
        '{}.ipynb'.format(s) for s in updated_section_list
    ]
    expected_notebook_files = [
        join(notebook_path_dict['general']['rsmeval'], s)
        for s in general_section_plus_extension
    ]
    eq_(notebook_files, expected_notebook_files)
コード例 #17
0
ファイル: test_report.py プロジェクト: WeilamChung/rsmtool
def test_get_ordered_notebook_files_default_rsmtool():
    general_sections = ['all']
    notebook_files = get_ordered_notebook_files(general_sections,
                                                model_type='skll',
                                                context='rsmtool')
    no_subgroup_list = [
        s for s in general_section_list_rsmtool if not s.endswith('by_group')
    ]
    section_list = ['header'] + no_subgroup_list + ['footer']

    # replace model section with skll_model.

    updated_section_list = [
        'skll_' + sname if sname == 'model' else sname
        for sname in section_list
    ]
    general_section_plus_extension = [
        s + '.ipynb' for s in updated_section_list
    ]
    expected_notebook_files = [
        join(notebook_path, s) for s in general_section_plus_extension
    ]
    eq_(notebook_files, expected_notebook_files)
コード例 #18
0
ファイル: rsmcompare.py プロジェクト: WeilamChung/rsmtool
def run_comparison(config_file, output_dir):
    """
    Run a comparison between the two RSMTool experiments
    specified in the config file and write out the
    comparison report to the output directory.
    """

    logger = logging.getLogger(__name__)

    # load the information from the config file
    # read in the main config file
    config_obj = read_json_file(config_file)
    config_obj = check_main_config(config_obj, context='rsmcompare')

    # get the subgroups if any
    subgroups = config_obj.get('subgroups')

    # get the directory where the config file lives
    configpath = dirname(config_file)

    # get the information about the "old" experiment
    description_old = config_obj['description_old']
    experiment_id_old = config_obj['experiment_id_old']
    experiment_dir_old = locate_file(config_obj['experiment_dir_old'],
                                     configpath)
    if not experiment_dir_old:
        raise FileNotFoundError("The directory {} "
                                "does not exist.".format(
                                    config_obj['experiment_dir_old']))
    else:
        csvdir_old = normpath(join(experiment_dir_old, 'output'))
        figdir_old = normpath(join(experiment_dir_old, 'figure'))
        if not exists(csvdir_old) or not exists(figdir_old):
            raise FileNotFoundError("The directory {} does not contain "
                                    "the output of an rsmtool "
                                    "experiment.".format(experiment_dir_old))
    use_scaled_predictions_old = config_obj['use_scaled_predictions_old']

    # get the information about the "new" experiment
    description_new = config_obj['description_new']
    experiment_id_new = config_obj['experiment_id_new']
    experiment_dir_new = locate_file(config_obj['experiment_dir_new'],
                                     configpath)
    if not experiment_dir_new:
        raise FileNotFoundError("The directory {} "
                                "does not exist.".format(
                                    config_obj['experiment_dir_new']))
    else:
        csvdir_new = normpath(join(experiment_dir_new, 'output'))
        figdir_new = normpath(join(experiment_dir_new, 'figure'))
        if not exists(csvdir_new) or not exists(figdir_new):
            raise FileNotFoundError("The directory {} does not contain "
                                    "the output of an rsmtool "
                                    "experiment.".format(experiment_dir_new))
    use_scaled_predictions_new = config_obj['use_scaled_predictions_new']

    # are there specific general report sections we want to include?
    general_report_sections = config_obj['general_sections']

    # what about the special or custom sections?
    special_report_sections = config_obj['special_sections']

    custom_report_section_paths = config_obj['custom_sections']

    if custom_report_section_paths:
        logger.info('Locating custom report sections')
        custom_report_sections = locate_custom_sections(
            custom_report_section_paths, configpath)
    else:
        custom_report_sections = []

    section_order = config_obj['section_order']

    chosen_notebook_files = get_ordered_notebook_files(general_report_sections,
                                                       special_report_sections,
                                                       custom_report_sections,
                                                       section_order,
                                                       subgroups,
                                                       model_type=None,
                                                       context='rsmcompare')

    # now generate the comparison report
    logger.info('Starting report generation')
    create_comparison_report(
        experiment_id_old,
        description_old,
        csvdir_old,
        figdir_old,
        experiment_id_new,
        description_new,
        csvdir_new,
        figdir_new,
        output_dir,
        subgroups,
        chosen_notebook_files,
        use_scaled_predictions_old=use_scaled_predictions_old,
        use_scaled_predictions_new=use_scaled_predictions_new)
コード例 #19
0
ファイル: rsmcompare.py プロジェクト: WeilamChung/rsmtool
def run_comparison(config_file, output_dir):
    """
    Run a comparison between the two RSMTool experiments
    specified in the config file and write out the
    comparison report to the output directory.
    """

    logger = logging.getLogger(__name__)

    # load the information from the config file
    # read in the main config file
    config_obj = read_json_file(config_file)
    config_obj = check_main_config(config_obj, context='rsmcompare')

    # get the subgroups if any
    subgroups = config_obj.get('subgroups')

    # get the directory where the config file lives
    configpath = dirname(config_file)

    # get the information about the "old" experiment
    description_old = config_obj['description_old']
    experiment_id_old = config_obj['experiment_id_old']
    experiment_dir_old = locate_file(config_obj['experiment_dir_old'], configpath)
    if not experiment_dir_old:
        raise FileNotFoundError("The directory {} "
                                "does not exist.".format(config_obj['experiment_dir_old']))
    else:
        csvdir_old = normpath(join(experiment_dir_old, 'output'))
        figdir_old = normpath(join(experiment_dir_old, 'figure'))
        if not exists(csvdir_old) or not exists(figdir_old):
            raise FileNotFoundError("The directory {} does not contain "
                                    "the output of an rsmtool "
                                    "experiment.".format(experiment_dir_old))
    use_scaled_predictions_old = config_obj['use_scaled_predictions_old']

    # get the information about the "new" experiment
    description_new = config_obj['description_new']
    experiment_id_new = config_obj['experiment_id_new']
    experiment_dir_new = locate_file(config_obj['experiment_dir_new'], configpath)
    if not experiment_dir_new:
        raise FileNotFoundError("The directory {} "
                                "does not exist.".format(config_obj['experiment_dir_new']))
    else:
        csvdir_new = normpath(join(experiment_dir_new, 'output'))
        figdir_new = normpath(join(experiment_dir_new, 'figure'))
        if not exists(csvdir_new) or not exists(figdir_new):
            raise FileNotFoundError("The directory {} does not contain "
                                    "the output of an rsmtool "
                                    "experiment.".format(experiment_dir_new))
    use_scaled_predictions_new = config_obj['use_scaled_predictions_new']

    # are there specific general report sections we want to include?
    general_report_sections = config_obj['general_sections']

    # what about the special or custom sections?
    special_report_sections = config_obj['special_sections']

    custom_report_section_paths = config_obj['custom_sections']

    if custom_report_section_paths:
        logger.info('Locating custom report sections')
        custom_report_sections = locate_custom_sections(custom_report_section_paths,
                                                         configpath)
    else:
        custom_report_sections = []

    section_order = config_obj['section_order']

    chosen_notebook_files = get_ordered_notebook_files(general_report_sections,
                                                       special_report_sections,
                                                       custom_report_sections,
                                                       section_order,
                                                       subgroups,
                                                       model_type=None,
                                                       context='rsmcompare')

    # now generate the comparison report
    logger.info('Starting report generation')
    create_comparison_report(experiment_id_old, description_old,
                             csvdir_old, figdir_old, experiment_id_new,
                             description_new, csvdir_new, figdir_new,
                             output_dir, subgroups,
                             chosen_notebook_files,
                             use_scaled_predictions_old=use_scaled_predictions_old,
                             use_scaled_predictions_new=use_scaled_predictions_new)
コード例 #20
0
def load_experiment_data(main_config_file, outdir):

    """
    Set up the experiment by loading the training
    and evaluation data sets and preprocessing them.
    """

    logger = logging.getLogger(__name__)

    # read in the main config file
    logger.info('Reading configuration file: {}'.format(main_config_file))
    config_obj = read_json_file(main_config_file)
    config_obj = check_main_config(config_obj)

    # get the directory where the config file lives
    configpath = dirname(main_config_file)

    # get the experiment ID
    experiment_id = config_obj['experiment_id']

    # get the description
    description = config_obj['description']

    # get the column name for the labels for the training and testing data
    train_label_column = config_obj['train_label_column']
    test_label_column = config_obj['test_label_column']

    # get the column name that will hold the ID for
    # both the training and the test data
    id_column = config_obj['id_column']

    # get the specified trim min and max values
    spec_trim_min, spec_trim_max = get_trim_min_max(config_obj)

    # get the name of the optional column that
    # contains response length.
    length_column = config_obj['length_column']

    # get the name of the optional column that
    # contains the second human score
    second_human_score_column = config_obj['second_human_score_column']

    # get the name of the optional column that
    # contains the candidate ID
    candidate_column = config_obj['candidate_column']

    # if the test label column is the same as the
    # second human score column, raise an error
    if test_label_column == second_human_score_column:
        raise ValueError("'test_label_column' and "
                         "'second_human_score_column' cannot have the "
                         "same value.")

    # get the name of the model that we want to train and
    # check that it's valid
    model_name = config_obj['model']
    model_type = check_model_name(model_name)

    # are we excluding zero scores?
    exclude_zero_scores = config_obj['exclude_zero_scores']

    # if we are excluding zero scores but trim_min
    # is set to 0, then we need to warn the user
    if exclude_zero_scores and spec_trim_min == 0:
        logger.warning("'exclude_zero_scores' is set to True but "
                       "'trim_min' is set to 0. This may cause "
                       " unexpected behavior.")

    # are we filtering on any other columns?
    flag_column_dict = check_flag_column(config_obj)

    # are we generating fake labels?
    use_fake_train_labels = train_label_column == 'fake'
    use_fake_test_labels = test_label_column == 'fake'

    # are we analyzing scaled or raw prediction values
    use_scaled_predictions = config_obj['use_scaled_predictions']

    # get the subgroups if any
    subgroups = config_obj.get('subgroups')

    # are there specific general report sections we want to include?
    general_report_sections = config_obj['general_sections']

    # what about the special or custom sections?
    special_report_sections = config_obj['special_sections']

    custom_report_section_paths = config_obj['custom_sections']

    if custom_report_section_paths:
        logger.info('Locating custom report sections')
        custom_report_sections = locate_custom_sections(custom_report_section_paths,
                                                         configpath)
    else:
        custom_report_sections = []

    section_order = config_obj['section_order']

    chosen_notebook_files = get_ordered_notebook_files(general_report_sections,
                                                       special_report_sections,
                                                       custom_report_sections,
                                                       section_order,
                                                       subgroups,
                                                       model_type=model_type,
                                                       context='rsmtool')

    # Read in the feature configurations.
    # Location of feature file
    feature_field = config_obj['features']

    # Check whether feature subset file exists and whether we are using
    # feature subset of prefix
    feature_subset_file = config_obj['feature_subset_file']
    if feature_subset_file:
        feature_subset_file_location = locate_file(feature_subset_file, configpath)
        if not feature_subset_file_location:
            raise FileNotFoundError('Feature subset file {} not '
                                    'found.\n'.format(config_obj['feature_subset_file']))

    feature_subset = config_obj['feature_subset']
    feature_prefix = config_obj['feature_prefix']

    # if the user requested feature_subset file and feature subset,
    # read the file and check its format
    if feature_subset_file and feature_subset:
        feature_subset_specs = pd.read_csv(feature_subset_file_location)
        check_feature_subset_file(feature_subset_specs, feature_subset)
    else:
        feature_subset_specs = None


    # Do we need to automatically find the best transformations/change sign?
    select_transformations = config_obj['select_transformations']
    feature_sign = config_obj['sign']

    requested_features = []
    feature_specs = {}
    select_features_automatically = True

    # For backward compatibility, we check whether this field can
    # be set to all and set the select_transformations to true
    # as was done in the previous version.
    if feature_field == 'all':
        select_transformations = True
    elif feature_field is not None:
        feature_file_location = locate_file(feature_field, configpath)
        select_features_automatically = False
        if not feature_file_location:
            raise FileNotFoundError('Feature file {} not '
                                    'found.\n'.format(config_obj['features']))
        else:
            logger.info('Reading feature file: {}'.format(feature_file_location))
            feature_json = read_json_file(feature_file_location)
            feature_specs = normalize_and_validate_feature_file(feature_json)
            requested_features = [fdict['feature'] for fdict in feature_specs['features']]

    # check to make sure that `length_column` or `second_human_score_column`
    # are not also included in the requested features, if they are specified
    if (length_column and
        length_column in requested_features):
        raise ValueError("The value of 'length_column' ('{}') cannot be "
                         "used as a model feature.".format(length_column))

    if (second_human_score_column and
        second_human_score_column in requested_features):
        raise ValueError("The value of 'second_human_score_column' ('{}') cannot be "
                         "used as a model feature.".format(second_human_score_column))

    # Specify column names that cannot be used as features
    reserved_column_names = list(set(['spkitemid', 'spkitemlab',
                                      'itemType', 'r1', 'r2', 'score',
                                      'sc', 'sc1', 'adj',
                                      train_label_column,
                                      test_label_column,
                                      id_column] + subgroups + list(flag_column_dict.keys())))

    # if `second_human_score_column` is specified, then
    # we need to add `sc2` to the list of reserved column
    # names. And same for 'length' and 'candidate', if `length_column`
    # and `candidate_column` are specified
    if second_human_score_column:
        reserved_column_names.append('sc2')
    if length_column:
        reserved_column_names.append('length')
    if candidate_column:
        reserved_column_names.append('candidate')

    # Make sure that the training data as specified in the
    # config file actually exists on disk and if it does,
    # load it and filter out the bad rows and features with
    # zero standard deviation. Also double check that the requested
    # features exist in the data or obtain the feature names if
    # no feature file was given.
    train_file_location = locate_file(config_obj['train_file'], configpath)
    if not train_file_location:
        raise FileNotFoundError('Error: Training file {} '
                                'not found.\n'.format(config_obj['train_file']))
    else:
        logger.info('Reading training data: {}'.format(train_file_location))

    (df_train_features,
     df_train_metadata,
     df_train_other_columns,
     df_train_excluded,
     df_train_length,
     _,
     df_train_flagged_responses,
     used_trim_min,
     used_trim_max,
     feature_names) = load_and_filter_data(train_file_location,
                                           train_label_column,
                                           id_column,
                                           length_column,
                                           None,
                                           candidate_column,
                                           requested_features,
                                           reserved_column_names,
                                           spec_trim_min,
                                           spec_trim_max,
                                           flag_column_dict,
                                           subgroups,
                                           exclude_zero_scores=exclude_zero_scores,
                                           exclude_zero_sd=True,
                                           feature_subset_specs=feature_subset_specs,
                                           feature_subset=feature_subset,
                                           feature_prefix=feature_prefix,
                                           use_fake_labels=use_fake_train_labels)

    # Generate feature specifications now that we
    # know what features are selected
    if select_features_automatically:
        if select_transformations is False:
            feature_specs = generate_default_specs(feature_names)
        else:
            feature_specs = generate_specs_from_data(feature_names,
                                                     'sc1',
                                                     df_train_features,
                                                     feature_subset_specs=feature_subset_specs,
                                                     feature_sign=feature_sign)
    # Sanity check to make sure the function returned the
    # same feature names as specified in feature json file,
    # if there was one
    elif not select_features_automatically:
        assert feature_names == requested_features

    # Do the same for the test data except we can ignore the trim min
    # and max since we already have that from the training data and
    # we have the feature_names when no feature file was specified.
    # We also allow features with 0 standard deviation in the test file.
    test_file_location = locate_file(config_obj['test_file'], configpath)
    if not test_file_location:
        raise FileNotFoundError('Error: Evaluation file '
                                '{} not found.\n'.format(config_obj['test_file']))
    elif (test_file_location == train_file_location
            and train_label_column == test_label_column):
        logging.warning('The same data file and label '
                        'column are used for both training '
                        'and evaluating the model. No second '
                        'score analysis will be performed, even '
                        'if requested.')
        df_test_features = df_train_features.copy()
        df_test_metadata = df_train_metadata.copy()
        df_test_excluded = df_train_excluded.copy()
        df_test_other_columns = df_train_other_columns.copy()
        df_test_flagged_responses = df_train_flagged_responses.copy()
        df_test_human_scores = pd.DataFrame()
    else:
        logger.info('Reading evaluation data: {}'.format(test_file_location))
        (df_test_features,
         df_test_metadata,
         df_test_other_columns,
         df_test_excluded,
         _,
         df_test_human_scores,
         df_test_flagged_responses,
         _, _, _) = load_and_filter_data(test_file_location,
                                         test_label_column,
                                         id_column,
                                         None,
                                         second_human_score_column,
                                         candidate_column,
                                         feature_names,
                                         reserved_column_names,
                                         used_trim_min,
                                         used_trim_max,
                                         flag_column_dict,
                                         subgroups,
                                         exclude_zero_scores=exclude_zero_scores,
                                         exclude_zero_sd=False,
                                         use_fake_labels=use_fake_test_labels)

    return (df_train_features, df_test_features,
            df_train_metadata, df_test_metadata,
            df_train_other_columns, df_test_other_columns,
            df_train_excluded, df_test_excluded,
            df_train_length, df_test_human_scores,
            df_train_flagged_responses,
            df_test_flagged_responses,
            experiment_id, description,
            train_file_location, test_file_location,
            feature_specs, model_name, model_type,
            train_label_column, test_label_column,
            id_column, length_column, second_human_score_column,
            candidate_column,
            subgroups,
            feature_subset_file,
            used_trim_min, used_trim_max,
            use_scaled_predictions, exclude_zero_scores,
            select_features_automatically,
            chosen_notebook_files)
コード例 #21
0
def run_evaluation(config_file, output_dir):
    """
    Run RSMTool evaluation experiment using the given configuration
    file and generate all evaluation outputs in the given directory.
    """

    logger = logging.getLogger(__name__)

    # create the 'output' and the 'figure' sub-directories
    # where all the experiment output such as the CSV files
    # and the box plots will be saved
    csvdir = abspath(join(output_dir, 'output'))
    figdir = abspath(join(output_dir, 'figure'))
    reportdir = abspath(join(output_dir, 'report'))
    os.makedirs(csvdir, exist_ok=True)
    os.makedirs(figdir, exist_ok=True)
    os.makedirs(reportdir, exist_ok=True)

    # load the information from the config file
    # read in the main config file
    config_obj = read_json_file(config_file)
    config_obj = check_main_config(config_obj, context='rsmeval')

    # get the directory where the config file lives
    # if this is the 'expm' directory, then go
    # up one level.
    configpath = dirname(config_file)

    # get the experiment ID
    experiment_id = config_obj['experiment_id']

    # get the description
    description = config_obj['description']

    # get the column name for the labels for the training and testing data
    human_score_column = config_obj['human_score_column']
    system_score_column = config_obj['system_score_column']

    # get the name of the optional column that
    # contains the second human score
    second_human_score_column = config_obj['second_human_score_column']

    # if the human score column is the same as the
    # second human score column, raise an error
    if human_score_column == second_human_score_column:
        raise ValueError("'human_score_column' and "
                         "'second_human_score_column' "
                         "cannot have the same value.")

    # get the column name that will hold the ID for
    # both the training and the test data
    id_column = config_obj['id_column']

    # get the specified trim min and max, if any
    # and make sure they are numeric
    spec_trim_min, spec_trim_max = get_trim_min_max(config_obj)

    # get the subgroups if any
    subgroups = config_obj.get('subgroups')

    # get the candidate column if any and convert it to string
    candidate_column = config_obj['candidate_column']

    general_report_sections = config_obj['general_sections']

    # get any special sections that the user might have specified
    special_report_sections = config_obj['special_sections']

    # get any custom sections and locate them to make sure
    # that they exist, otherwise raise an exception
    custom_report_section_paths = config_obj['custom_sections']
    if custom_report_section_paths:
        logger.info('Locating custom report sections')
        custom_report_sections = locate_custom_sections(
            custom_report_section_paths, configpath)
    else:
        custom_report_sections = []

    section_order = config_obj['section_order']

    #  check all sections values and order and get the
    # ordered list of notebook files
    chosen_notebook_files = get_ordered_notebook_files(general_report_sections,
                                                       special_report_sections,
                                                       custom_report_sections,
                                                       section_order,
                                                       subgroups,
                                                       model_type=None,
                                                       context='rsmeval')
    # are we excluding zero scores?
    exclude_zero_scores = config_obj['exclude_zero_scores']

    # if we are excluding zero scores but trim_min
    # is set to 0, then we need to warn the user
    if exclude_zero_scores and spec_trim_min == 0:
        logger.warning("'exclude_zero_scores' is set to True but "
                       " 'trim_min' is set to 0. This may cause "
                       " unexpected behavior.")

    # are we filtering on any other columns?
    flag_column_dict = check_flag_column(config_obj)

    # do we have the training set predictions and human scores CSV file
    scale_with = config_obj.get('scale_with')

    # scale_with can be one of the following:
    # (a) None       : the predictions are assumed to be 'raw' and should be used as is
    #                  when computing the metrics; the names for the final columns are
    #                  'raw', 'raw_trim' and 'raw_trim_round'.
    # (b) 'asis'     : the predictions are assumed to be pre-scaled and should be used as is
    #                  when computing the metrics; the names for the final columns are
    #                  'scale', 'scale_trim' and 'scale_trim_round'.
    # (c) a CSV file : the predictions are assumed to be 'raw' and should be scaled
    #                  before computing the metrics; the names for the final columns are
    #                  'scale', 'scale_trim' and 'scale_trim_round'.

    # we need to scale if and only if a CSV file is specified
    do_scaling = (scale_with != None and scale_with != 'asis')

    # use scaled predictions for the analyses unless
    # we were told not to
    use_scaled_predictions = (scale_with != None)

    # log an appropriate message
    if scale_with is None:
        message = ('Assuming given system predictions '
                   'are unscaled and will be used as such.')
    elif scale_with == 'asis':
        message = ('Assuming given system predictions '
                   'are already scaled and will be used as such.')
    else:
        message = ('Assuming given system predictions '
                   'are unscaled and will be scaled before use.')
    logger.info(message)

    # load the predictions from disk and make sure that the `id_column`
    # is read in as a string
    predictions_file_location = locate_file(config_obj['predictions_file'],
                                            configpath)
    if not predictions_file_location:
        raise FileNotFoundError('Error: Predictions file {} '
                                'not found.\n'.format(
                                    config_obj['predictions_file']))
    else:
        logger.info(
            'Reading predictions: {}'.format(predictions_file_location))
        string_columns = [id_column, candidate_column] + subgroups
        converter_dict = dict([(column, str) for column in string_columns
                               if column])

        df_pred = pd.read_csv(predictions_file_location,
                              converters=converter_dict)

    # make sure that the columns specified in the config file actually exist
    missing_columns = set([id_column, human_score_column,
                           system_score_column]).difference(df_pred.columns)
    if missing_columns:
        raise KeyError('Columns {} from the config file do not exist '
                       'in the predictions file.'.format(missing_columns))

    df_pred = rename_default_columns(df_pred, [], id_column,
                                     human_score_column,
                                     second_human_score_column, None,
                                     system_score_column, candidate_column)

    # check that the id_column contains unique values
    if df_pred['spkitemid'].size != df_pred['spkitemid'].unique().size:
        raise ValueError("The data contains duplicate response IDs "
                         "in '{}'. Please make sure all response IDs "
                         "are unique and re-run the tool.".format(id_column))

    df_pred = check_subgroups(df_pred, subgroups)

    # filter out the responses based on flag columns
    (df_responses_with_requested_flags,
     df_responses_with_excluded_flags) = filter_on_flag_columns(
         df_pred, flag_column_dict)

    # filter out rows that have non-numeric or zero human scores
    df_filtered, df_excluded = filter_on_column(
        df_responses_with_requested_flags,
        'sc1',
        'spkitemid',
        exclude_zeros=exclude_zero_scores)

    # make sure that the remaining data frame is not empty
    if len(df_filtered) == 0:
        raise ValueError("No responses remaining after filtering out "
                         "non-numeric human scores. No further analysis "
                         "can be run. ")

    # Change all non-numeric machine scores in excluded
    # data to NaNs for consistency with rsmtool.
    # NOTE: This will *not* work if *all* of the values
    # in column are non-numeric. This is a known bug in
    # pandas: https://github.com/pydata/pandas/issues/9589
    # Therefore, we need add an additional check after this.
    df_excluded['raw'] = pd.to_numeric(df_excluded['raw'],
                                       errors='coerce').astype(float)

    # filter out the non-numeric machine scores from the rest of the data
    newdf, newdf_excluded = filter_on_column(df_filtered,
                                             'raw',
                                             'spkitemid',
                                             exclude_zeros=False)

    del df_filtered
    df_filtered_pred = newdf

    # make sure that the remaining data frame is not empty
    if len(df_filtered_pred) == 0:
        raise ValueError("No responses remaining after filtering out "
                         "non-numeric machine scores. No further analysis "
                         "can be run. ")

    df_excluded = pd.merge(df_excluded, newdf_excluded, how='outer')

    # set default values for scaling
    scale_pred_mean = 0
    scale_pred_sd = 1
    scale_human_mean = 0
    scale_human_sd = 1

    if do_scaling:
        scale_file_location = locate_file(scale_with, configpath)
        if not scale_file_location:
            raise FileNotFoundError(
                'Error: scaling file {} not found.\n'.format(scale_with))
        else:
            logger.info('Reading scaling file: {}'.format(scale_file_location))
            df_scale_with = pd.read_csv(scale_file_location)

        if 'sc1' not in df_scale_with.columns and 'prediction' not in df_scale_with.columns:
            raise KeyError(
                'The CSV file specified for scaling ',
                'must have the "prediction" and the "sc1" '
                'columns.')
        else:
            scale_pred_mean, scale_pred_sd = (
                df_scale_with['prediction'].mean(),
                df_scale_with['prediction'].std())
            scale_human_mean, scale_human_sd = (df_scale_with['sc1'].mean(),
                                                df_scale_with['sc1'].std())

    logger.info('Processing predictions')
    df_pred_processed = process_predictions(df_filtered_pred, scale_pred_mean,
                                            scale_pred_sd, scale_human_mean,
                                            scale_human_sd, spec_trim_min,
                                            spec_trim_max)
    if not scale_with:
        expected_score_types = ['raw', 'raw_trim', 'raw_trim_round']
    elif scale_with == 'asis':
        expected_score_types = ['scale', 'scale_trim', 'scale_trim_round']
    else:
        expected_score_types = [
            'raw', 'raw_trim', 'raw_trim_round', 'scale', 'scale_trim',
            'scale_trim_round'
        ]

    # extract separated data frames that we will write out
    # as separate files
    not_other_columns = set()

    prediction_columns = ['spkitemid', 'sc1'] + expected_score_types
    df_predictions_only = df_pred_processed[prediction_columns]
    not_other_columns.update(prediction_columns)

    metadata_columns = ['spkitemid'] + subgroups
    if candidate_column:
        metadata_columns.append('candidate')
    df_test_metadata = df_filtered_pred[metadata_columns]
    not_other_columns.update(metadata_columns)

    df_test_human_scores = pd.DataFrame()
    human_score_columns = ['spkitemid', 'sc1', 'sc2']
    if second_human_score_column and 'sc2' in df_filtered_pred:
        df_test_human_scores = df_filtered_pred[human_score_columns].copy()
        not_other_columns.update(['sc2'])
        # filter out any non-numeric values nows
        # as well as zeros, if we were asked to
        df_test_human_scores['sc2'] = pd.to_numeric(
            df_test_human_scores['sc2'], errors='coerce').astype(float)
        if exclude_zero_scores:
            df_test_human_scores['sc2'] = df_test_human_scores['sc2'].replace(
                0, np.nan)

    # remove 'spkitemid' from `not_other_columns`
    # because we want that in the other columns
    # data frame
    not_other_columns.remove('spkitemid')

    # extract all of the other columns in the predictions file
    other_columns = [
        column for column in df_filtered_pred.columns
        if column not in not_other_columns
    ]
    df_pred_other_columns = df_filtered_pred[other_columns]

    logger.info('Saving pre-processed predictions and the metadata to disk')
    write_experiment_output([
        df_predictions_only, df_test_metadata, df_pred_other_columns,
        df_test_human_scores, df_excluded, df_responses_with_excluded_flags
    ], [
        'pred_processed', 'test_metadata', 'test_other_columns',
        'test_human_scores', 'test_excluded_responses',
        'test_responses_with_excluded_flags'
    ], experiment_id, csvdir)

    # do the data composition stats
    (df_test_excluded_analysis, df_data_composition,
     data_composition_by_group_dict
     ) = run_data_composition_analyses_for_rsmeval(
         df_test_metadata,
         df_excluded,
         subgroups,
         candidate_column,
         exclude_zero_scores=exclude_zero_scores)

    write_experiment_output([df_test_excluded_analysis, df_data_composition],
                            ['test_excluded_composition', 'data_composition'],
                            experiment_id, csvdir)

    # write the results of data composition analysis by group
    if subgroups:
        for group in subgroups:
            write_experiment_output([data_composition_by_group_dict[group]],
                                    ['data_composition_by_{}'.format(group)],
                                    experiment_id, csvdir)

    # run the analyses on the predictions of the modelx`
    logger.info('Running analyses on predictions')
    (df_human_machine_eval, df_human_machine_eval_short, df_human_human_eval,
     eval_by_group_dict, df_degradation, df_confmatrix,
     df_score_dist) = run_prediction_analyses(
         df_predictions_only,
         df_test_metadata,
         df_test_human_scores,
         subgroups,
         second_human_score_column,
         exclude_zero_scores=exclude_zero_scores,
         use_scaled_predictions=use_scaled_predictions)

    write_experiment_output([
        df_human_machine_eval, df_human_machine_eval_short,
        df_human_human_eval, df_degradation, df_confmatrix, df_score_dist
    ], [
        'eval', 'eval_short', 'consistency', 'degradation', 'confMatrix',
        'score_dist'
    ],
                            experiment_id,
                            csvdir,
                            reset_index=True)

    # if we are using subgroups, then write out the subgroup
    # specific output and include the by group section
    # in the final report
    if subgroups:
        for group in subgroups:
            eval_by_group, consistency_by_group = eval_by_group_dict[group]
            write_experiment_output([eval_by_group, consistency_by_group], [
                'eval_by_{}'.format(group), 'consistency_by_{}'.format(group)
            ],
                                    experiment_id,
                                    csvdir,
                                    reset_index=True)

    # generate the report
    logger.info('Starting report generation')
    create_report(experiment_id,
                  description,
                  '',
                  '',
                  '',
                  predictions_file_location,
                  csvdir,
                  figdir,
                  subgroups,
                  None,
                  second_human_score_column,
                  chosen_notebook_files,
                  exclude_zero_scores=exclude_zero_scores,
                  use_scaled_predictions=use_scaled_predictions)