def test_filter_features_libsvm_output_argparse(): """ Make sure filter_features exits when passing in output libsvm files """ ff_cmd_args = ['foo.csv', 'bar.libsvm', '-f', 'a', 'b', 'c'] ff.main(argv=ff_cmd_args)
def test_filter_features_unknown_input_format(): """ Make sure that filter_features exits when passing in an unknown input file format """ ff_cmd_args = ['foo.xxx', 'bar.csv', '-f', 'a', 'b', 'c'] ff.main(argv=ff_cmd_args)
def check_filter_features_raises_system_exit(cmd_args): """ Little helper to make test output cleaner for tests that check that filter_features exits with the specified arguments. """ ff.main(cmd_args)
def check_filter_features_arff_argparse(filter_type, label_col='y', id_col='id', inverse=False, quiet=False): """ A utility function to check that we are setting up argument parsing correctly for filter_features for ARFF file types. We are not checking whether the results are correct because we have separate tests for that. """ # replace the run_configuration function that's called # by the main() in filter_feature with a mocked up version writer_class = skll.data.writers.ARFFWriter # create some dummy input and output filenames infile = join(_my_dir, 'other', 'test_filter_features_input.arff') outfile = 'bar.arff' # create a simple featureset with actual ids, labels and features fs, _ = make_classification_data(num_labels=3, train_test_ratio=1.0) writer = writer_class(infile, fs, label_col=label_col, id_col=id_col) writer.write() ff_cmd_args = [infile, outfile] if filter_type == 'feature': if inverse: features_to_keep = ['f01', 'f04', 'f07', 'f10'] else: features_to_keep = ['f02', 'f03', 'f05', 'f06', 'f08', 'f09'] ff_cmd_args.append('-f') for f in features_to_keep: ff_cmd_args.append(f) elif filter_type == 'id': if inverse: ids_to_keep = ['EXAMPLE_{}'.format(x) for x in range(1, 100, 2)] else: ids_to_keep = ['EXAMPLE_{}'.format(x) for x in range(2, 102, 2)] ff_cmd_args.append('-I') for idee in ids_to_keep: ff_cmd_args.append(idee) elif filter_type == 'label': if inverse: labels_to_keep = ['0', '1'] else: labels_to_keep = ['2'] ff_cmd_args.append('-L') for lbl in labels_to_keep: ff_cmd_args.append(lbl) ff_cmd_args.extend(['-l', label_col]) ff_cmd_args.extend(['--id_col', id_col]) if inverse: ff_cmd_args.append('-i') if quiet: ff_cmd_args.append('-q') # Substitute mock methods for the main methods that get called by # filter_features for arff files: FeatureSet.filter() and the __init__() # method of the appropriate writer. We also need to mock the write() # method to prevent actual writing. with patch.object(FeatureSet, 'filter', autospec=True) as filter_mock, \ patch.object(writer_class, '__init__', autospec=True, return_value=None) as write_init_mock, \ patch.object(writer_class, 'write', autospec=True) as write_mock: ff.main(argv=ff_cmd_args) # get the various arguments from the three mocked up methods filter_pos_arguments, filter_kw_arguments = filter_mock.call_args write_pos_arguments, write_kw_arguments = write_init_mock.call_args # make sure that the arguments they got were the ones we specified eq_(write_pos_arguments[1], outfile) eq_(write_kw_arguments['quiet'], quiet) # note that we cannot test the label_col column for the writer # the reason is that is set conditionally and those conditions # do not execute with mocking eq_(filter_pos_arguments[0], fs) eq_(filter_kw_arguments['inverse'], inverse) if filter_type == 'feature': eq_(filter_kw_arguments['features'], features_to_keep) elif filter_type == 'id': eq_(filter_kw_arguments['ids'], ids_to_keep) elif filter_type == 'label': eq_(filter_kw_arguments['labels'], labels_to_keep)