Beispiel #1
0
def test_join_features_libsvm_output_argparse():
    """
    Make sure that join_features exits when passing in output libsvm files
    """

    jf_cmd_args = ['foo.csv', 'bar.csv', 'baz.libsvm']
    jf.main(argv=jf_cmd_args)
Beispiel #2
0
def test_join_features_unknown_output_format():
    """
    Make sure that join_features exits when passing in an unknown output file format
    """

    jf_cmd_args = ['foo.csv', 'bar.csv', 'baz.xxx']
    jf.main(argv=jf_cmd_args)
Beispiel #3
0
def check_join_features_raises_system_exit(cmd_args):
    """
    Little helper to make test output cleaner for tests that check that
    join_features exits with the specified arguments.
    """
    jf.main(cmd_args)
Beispiel #4
0
def check_join_features_argparse(extension, label_col='y', id_col='id',
                                 quiet=False):
    """
    A utility function to check that we are setting up argument parsing
    correctly for join_features for ALL file types. We are not checking
    whether the results are correct because we have separate tests for that.
    """

    # replace the run_configuration function that's called
    # by the main() in filter_feature with a mocked up version
    writer_class = EXT_TO_WRITER[extension]

    # create some dummy input and output filenames
    infile1 = join(_my_dir, 'other', 'test_join_features1{}'.format(extension))
    infile2 = join(_my_dir, 'other', 'test_join_features2{}'.format(extension))
    outfile = 'bar{}'.format(extension)

    # create a simple featureset with actual ids, labels and features
    fs1, _ = make_classification_data(num_labels=3,
                                      train_test_ratio=1.0,
                                      random_state=1234)
    fs2, _ = make_classification_data(num_labels=3,
                                      train_test_ratio=1.0,
                                      feature_prefix='g',
                                      random_state=5678)

    jf_cmd_args = [infile1, infile2, outfile]

    if extension in ['.tsv', '.csv', '.arff']:
        writer1 = writer_class(infile1, fs1, label_col=label_col,
                               id_col=id_col)
        writer2 = writer_class(infile2, fs2, label_col=label_col,
                               id_col=id_col)
        jf_cmd_args.extend(['-l', label_col])
        jf_cmd_args.extend(['--id_col', id_col])
    else:
        writer1 = writer_class(infile1, fs1)
        writer2 = writer_class(infile2, fs2)

    writer1.write()
    writer2.write()

    if quiet:
        jf_cmd_args.append('-q')

    # Substitute mock methods for the main methods that get called by
    # filter_features: FeatureSet.filter() and the __init__() method
    # of the appropriate writer. We also need to mock the write()
    # method to prevent actual writing.
    with patch.object(FeatureSet, '__add__', autospec=True) as add_mock, \
            patch.object(writer_class, '__init__', autospec=True,
                         return_value=None) as write_init_mock, \
            patch.object(writer_class, 'write', autospec=True) as write_mock:

        jf.main(argv=jf_cmd_args)

        # get the various arguments from the three mocked up methods
        add_pos_arguments, add_kw_arguments = add_mock.call_args
        write_pos_arguments, write_kw_arguments = write_init_mock.call_args

        # make sure that the arguments they got were the ones we specified
        eq_(write_pos_arguments[1], outfile)
        eq_(write_kw_arguments['quiet'], quiet)

        # note that we cannot test the label_col column for the writer
        # the reason is that is set conditionally and those conditions
        # do not execute with mocking

        eq_(add_pos_arguments[0], fs1)
        eq_(add_pos_arguments[1], fs2)