Ejemplo n.º 1
0
def test_filter_features_libsvm_output_argparse():
    """
    Make sure filter_features exits when passing in output libsvm files
    """

    ff_cmd_args = ['foo.csv', 'bar.libsvm', '-f', 'a', 'b', 'c']
    ff.main(argv=ff_cmd_args)
Ejemplo n.º 2
0
def test_filter_features_unknown_input_format():
    """
    Make sure that filter_features exits when passing in an unknown input file format
    """

    ff_cmd_args = ['foo.xxx', 'bar.csv', '-f', 'a', 'b', 'c']
    ff.main(argv=ff_cmd_args)
Ejemplo n.º 3
0
def check_filter_features_raises_system_exit(cmd_args):
    """
    Little helper to make test output cleaner for tests that check that
    filter_features exits with the specified arguments.
    """
    ff.main(cmd_args)
Ejemplo n.º 4
0
def check_filter_features_arff_argparse(filter_type, label_col='y',
                                        id_col='id', inverse=False,
                                        quiet=False):
    """
    A utility function to check that we are setting up argument parsing
    correctly for filter_features for ARFF file types. We are not checking
    whether the results are correct because we have separate tests for that.
    """

    # replace the run_configuration function that's called
    # by the main() in filter_feature with a mocked up version
    writer_class = skll.data.writers.ARFFWriter

    # create some dummy input and output filenames
    infile = join(_my_dir, 'other', 'test_filter_features_input.arff')
    outfile = 'bar.arff'

    # create a simple featureset with actual ids, labels and features
    fs, _ = make_classification_data(num_labels=3, train_test_ratio=1.0)

    writer = writer_class(infile, fs, label_col=label_col, id_col=id_col)
    writer.write()

    ff_cmd_args = [infile, outfile]

    if filter_type == 'feature':
        if inverse:
            features_to_keep = ['f01', 'f04', 'f07', 'f10']
        else:
            features_to_keep = ['f02', 'f03', 'f05', 'f06', 'f08', 'f09']

        ff_cmd_args.append('-f')

        for f in features_to_keep:
            ff_cmd_args.append(f)

    elif filter_type == 'id':
        if inverse:
            ids_to_keep = ['EXAMPLE_{}'.format(x) for x in range(1, 100, 2)]
        else:
            ids_to_keep = ['EXAMPLE_{}'.format(x) for x in range(2, 102, 2)]

        ff_cmd_args.append('-I')

        for idee in ids_to_keep:
            ff_cmd_args.append(idee)

    elif filter_type == 'label':
        if inverse:
            labels_to_keep = ['0', '1']
        else:
            labels_to_keep = ['2']

        ff_cmd_args.append('-L')

        for lbl in labels_to_keep:
            ff_cmd_args.append(lbl)

    ff_cmd_args.extend(['-l', label_col])
    ff_cmd_args.extend(['--id_col', id_col])

    if inverse:
        ff_cmd_args.append('-i')

    if quiet:
        ff_cmd_args.append('-q')

    # Substitute mock methods for the main methods that get called by
    # filter_features for arff files: FeatureSet.filter() and the __init__()
    # method of the appropriate writer.  We also need to mock the write()
    # method to prevent actual writing.
    with patch.object(FeatureSet, 'filter', autospec=True) as filter_mock, \
            patch.object(writer_class, '__init__', autospec=True,
                         return_value=None) as write_init_mock, \
            patch.object(writer_class, 'write', autospec=True) as write_mock:

        ff.main(argv=ff_cmd_args)

        # get the various arguments from the three mocked up methods
        filter_pos_arguments, filter_kw_arguments = filter_mock.call_args
        write_pos_arguments, write_kw_arguments = write_init_mock.call_args

        # make sure that the arguments they got were the ones we specified
        eq_(write_pos_arguments[1], outfile)
        eq_(write_kw_arguments['quiet'], quiet)

        # note that we cannot test the label_col column for the writer
        # the reason is that is set conditionally and those conditions
        # do not execute with mocking

        eq_(filter_pos_arguments[0], fs)
        eq_(filter_kw_arguments['inverse'], inverse)

        if filter_type == 'feature':
            eq_(filter_kw_arguments['features'], features_to_keep)
        elif filter_type == 'id':
            eq_(filter_kw_arguments['ids'], ids_to_keep)
        elif filter_type == 'label':
            eq_(filter_kw_arguments['labels'], labels_to_keep)