def test_skip_missing_test_files_usage(): command_line_args = [ '--reference-input', 'tests/data/i2b2_2016_track-1_reference', '--test-input', 'tests/data/i2b2_2016_track-1_test', '--skip-missing-files' ] args = args_and_configs.get_arguments(command_line_args) assert args.skip_missing_files == True ## Performance should be identical to default command_line_args = [ '--reference-input', 'tests/data/i2b2_2016_track-1_reference', '--test-input', 'tests/data/i2b2_2016_track-1_test' ] args = args_and_configs.get_arguments(command_line_args) assert args.skip_missing_files == True
def test_print_counts_and_metrics_and_confusion_for_test(): command_line_args = [ '--print-counts', '--print-metrics', '--print-confusion-matrix', '--test-input', 'tests/data/i2b2_2016_track-1_test' ] with pytest.raises(SystemExit) as e_info: args = args_and_configs.get_arguments(command_line_args)
def test_default_ignore_whitespace_flag(): command_line_args = [ '--reference-input', 'tests/data/i2b2_2016_track-1_reference', '--test-input', 'tests/data/i2b2_2016_track-1_test' ] args = args_and_configs.get_arguments(command_line_args) assert args.ignore_whitespace == True
def test_score_missing_test_files_usage(): command_line_args = [ '--reference-input', 'tests/data/i2b2_2016_track-1_reference', '--test-input', 'tests/data/i2b2_2016_track-1_test', '--score-missing-files' ] args = args_and_configs.get_arguments(command_line_args) assert args.skip_missing_files == False
def test_print_counts_ref_only(): command_line_args = [ '--print-counts', '--no-metrics', '--no-confusion-matrix', '--reference-input', 'tests/data/i2b2_2016_track-1_reference' ] args = args_and_configs.get_arguments(command_line_args) assert args.print_counts assert not args.print_metrics assert not args.print_confusion_matrix assert args.reference_input is not None assert args.test_input is None
def initialize_for_track1(): command_line_args = [ '--reference-input', 'tests/data/n2c2_2018_track-1_reference', '--test-input', 'tests/data/n2c2_2018_track-1_test', '--empty-value', '0.0' ] args = args_and_configs.get_arguments(command_line_args) ## TODO - we are usurping the test for this done within init_args to make ## the test more protable, for now. args.empty_value = float(args.empty_value) file_mapping = { '103.xml': '103.xml', '203.xml': '203.xml', '303.xml': '303.xml' } return (args, file_mapping)
def init_args(): ## args = args_and_configs.get_arguments(sys.argv[1:]) ## Set up logging if args.verbose: log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG) log.info("Verbose output.") log.debug("{}".format(args)) else: log.basicConfig(format="%(levelname)s: %(message)s") ## Configure progressbar peformance if (args.progressbar_output == 'none'): args.progressbar_disabled = True args.progressbar_file = None else: args.progressbar_disabled = False if (args.progressbar_output == 'stderr'): args.progressbar_file = sys.stderr elif (args.progressbar_output == 'stdout'): args.progressbar_file = sys.stdout ## F-score beta values are commonly set to 1, 2, and 0.5 but we ## want to support any values. It's easiest to do filtering at ## this point in the pipeline to standardize beta values and how ## they show up in the pipeline if ('F' in args.metrics_list): f_position = args.metrics_list.index('F') args.metrics_list.pop(f_position) if (len(args.f_beta_values) == 0): log.warning( 'F was included in the list of metrics to calculate but no beta values were provided (--f-beta-values <betas>)' ) else: ## Reverse the list so that they get inserted into the metrics_list ## in the proper order args.f_beta_values.reverse() for beta in args.f_beta_values: if ('F{}'.format(beta) not in args.metrics_list): args.metrics_list.insert(f_position, 'F{}'.format(beta)) else: if (len(args.f_beta_values) > 0): log.warning( 'F beta values were provided but "F" was not included in the list of metrics to calculate (--f-beta-values <betas>)' ) args.f_beta_values = [] for common_beta in ['1', '2', '0.5']: if ('F{}'.format(common_beta) in args.metrics_list): if (common_beta not in args.f_beta_values): args.f_beta_values.append(common_beta) ## The command line parameters are always initially cast as strings. ## That works fine for some empty values. Sometimes we want to use ## 0 (int) or 0.0 (float) or -1 as empty values. In this case, ## it's best to cast the string to the appropriate numerical ## type for formatting later. if (args.empty_value is not None and args.empty_value != ''): try: args.empty_value = int(args.empty_value) except ValueError: log.debug('Default empty_value is not an int') try: args.empty_value = float(args.empty_value) except ValueError: log.debug('Default empty_value is not a float') ## Resolve conflicts between --ignore-whitespace, --heed-whitespace, ## and --ignore-regex flags. Essentially, if we set something in ## skip_chars, use that. Otherwise, if we tripped --ignore_whitespace ## then set skip_chars accordingly if (args.ignore_whitespace and args.skip_chars == None): args.skip_chars = r'[\s]' ## lstrip hack added to handle prefixes and suffixes with dashes ## https://stackoverflow.com/questions/16174992/cant-get-argparse-to-read-quoted-string-with-dashes-in-it args.file_prefix = args.file_prefix.lstrip() args.file_suffix[0] = args.file_suffix[0].lstrip() if (len(args.file_suffix) == 2): args.file_suffix[1] = args.file_suffix[1].lstrip() ## Initialize the list of annotation attributes to score args.attributes_list = [] args.scorable_attributes = [] if (isinstance(args.attributes_string, str)): for attribute_key in args.attributes_string.split(','): ## Strip off any extra whitespace before processing attribute_key = attribute_key.strip() attribute_kernel = attribute_key.split('/') last = len(attribute_kernel) - 1 args.attributes_list.append( [attribute_kernel[0], attribute_kernel[last]]) ## Initialize the list of normalization engines to score args.normalization_list = [] args.scorable_engines = [] args.normalization_synonyms = {} if (isinstance(args.normalization_string, str)): for normalization_key in args.normalization_string.split(','): ## Strip off any extra whitespace before processing normalization_key = normalization_key.strip() normalization_kernel = normalization_key.split('/') last = len(normalization_kernel) - 1 args.normalization_list.append( [normalization_kernel[0], normalization_kernel[last]]) ## Only bother to load the normalization_file if the --score-normalization ## flag was used args.normalization_synonyms = \ args_and_configs.process_normalization_file( args.normalization_file ) ## Initialize the corpuse settings, values, and metrics file ## if it was provided at the command line if (args.corpus_out): ## Clean out any previous corpus dictionary, in case it exists from ## an old run with open(args.corpus_out, 'w') as fp: json.dump({}, fp, sort_keys=True, indent=4) ## Add a few important arguments scoring_metrics.update_output_dictionary(args.corpus_out, ['args'], [ 'reference_config', 'reference_input', 'reference_out', 'test_config', 'test_input', 'test_out', 'score_key', 'fuzzy_flags' ], [ args.reference_config, args.reference_input, args.reference_out, args.test_config, args.test_input, args.test_out, args.score_key, args.fuzzy_flags ]) return args
def test_print_counts_neither_ref_nor_test(): command_line_args = [ '--print-counts', '--no-metrics', '--no-confusion-matrix' ] with pytest.raises(SystemExit) as e_info: args = args_and_configs.get_arguments(command_line_args)
def test_required_input_flags_test_only(): command_line_args = ['--test-input', 'tests/data/i2b2_2016_track-1_test'] with pytest.raises(SystemExit) as e_info: args = args_and_configs.get_arguments(command_line_args)