def test_join_features_libsvm_output_argparse(): """ Make sure that join_features exits when passing in output libsvm files """ jf_cmd_args = ['foo.csv', 'bar.csv', 'baz.libsvm'] jf.main(argv=jf_cmd_args)
def test_join_features_unknown_output_format(): """ Make sure that join_features exits when passing in an unknown output file format """ jf_cmd_args = ['foo.csv', 'bar.csv', 'baz.xxx'] jf.main(argv=jf_cmd_args)
def check_join_features_raises_system_exit(cmd_args): """ Little helper to make test output cleaner for tests that check that join_features exits with the specified arguments. """ jf.main(cmd_args)
def check_join_features_argparse(extension, label_col='y', id_col='id', quiet=False): """ A utility function to check that we are setting up argument parsing correctly for join_features for ALL file types. We are not checking whether the results are correct because we have separate tests for that. """ # replace the run_configuration function that's called # by the main() in filter_feature with a mocked up version writer_class = EXT_TO_WRITER[extension] # create some dummy input and output filenames infile1 = join(_my_dir, 'other', 'test_join_features1{}'.format(extension)) infile2 = join(_my_dir, 'other', 'test_join_features2{}'.format(extension)) outfile = 'bar{}'.format(extension) # create a simple featureset with actual ids, labels and features fs1, _ = make_classification_data(num_labels=3, train_test_ratio=1.0, random_state=1234) fs2, _ = make_classification_data(num_labels=3, train_test_ratio=1.0, feature_prefix='g', random_state=5678) jf_cmd_args = [infile1, infile2, outfile] if extension in ['.tsv', '.csv', '.arff']: writer1 = writer_class(infile1, fs1, label_col=label_col, id_col=id_col) writer2 = writer_class(infile2, fs2, label_col=label_col, id_col=id_col) jf_cmd_args.extend(['-l', label_col]) jf_cmd_args.extend(['--id_col', id_col]) else: writer1 = writer_class(infile1, fs1) writer2 = writer_class(infile2, fs2) writer1.write() writer2.write() if quiet: jf_cmd_args.append('-q') # Substitute mock methods for the main methods that get called by # filter_features: FeatureSet.filter() and the __init__() method # of the appropriate writer. We also need to mock the write() # method to prevent actual writing. with patch.object(FeatureSet, '__add__', autospec=True) as add_mock, \ patch.object(writer_class, '__init__', autospec=True, return_value=None) as write_init_mock, \ patch.object(writer_class, 'write', autospec=True) as write_mock: jf.main(argv=jf_cmd_args) # get the various arguments from the three mocked up methods add_pos_arguments, add_kw_arguments = add_mock.call_args write_pos_arguments, write_kw_arguments = write_init_mock.call_args # make sure that the arguments they got were the ones we specified eq_(write_pos_arguments[1], outfile) eq_(write_kw_arguments['quiet'], quiet) # note that we cannot test the label_col column for the writer # the reason is that is set conditionally and those conditions # do not execute with mocking eq_(add_pos_arguments[0], fs1) eq_(add_pos_arguments[1], fs2)