def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global input_patterns global input_filetype_ext global output_filetype_ext global match_distance global non_overlapping global id_order global annotate global only_the_matching_subpart util.treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--input-from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-p", "--patterns"): input_patterns = filetype.parse_entities([a]) elif o in ("-d", "--match-distance") : match_distance = a elif o in ("-N", "--non-overlapping") : non_overlapping = True elif o == "--id-order": id_order = a.split(":") elif o == "--annotate": annotate = True elif o == "--only-matching": only_the_matching_subpart = True else: raise Exception("Bad arg " + o) if input_patterns is None: util.error("No patterns provided. Option --patterns is mandatory!") if only_the_matching_subpart and annotate: util.warn("Switch --only-matching disables --annotate")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns global ignore_pos global surface_instead_lemmas global print_cand_freq global print_source global match_distance global non_overlapping global input_filetype_ext global output_filetype_ext global id_order treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] patterns_file = None for ( o, a ) in opts: if o in ("-p", "--patterns") : mode.append( "patterns" ) patterns_file = a elif o in ( "-n", "--ngram" ) : create_patterns_file( a ) mode.append( "ngram" ) elif o in ("-g", "--ignore-pos") : ignore_pos = True elif o in ("-d", "--match-distance") : match_distance = a elif o in ("-N", "--non-overlapping") : non_overlapping = True elif o in ("-s", "--surface") : surface_instead_lemmas = True elif o in ("-S", "--source") : print_source = True elif o in ("-f", "--freq") : print_cand_freq = True elif o in ("-i", "--index") : input_filetype_ext = "BinaryIndex" warn("Option -i is deprecated; use --from=BinaryIndex") elif o == "--id-order": id_order = a.split(":") elif o == "--from" : input_filetype_ext = a elif o == "--to" : output_filetype_ext = a else: raise Exception("Bad flag") if non_overlapping and match_distance == "All": # If we are taking all matches, we need to be able to overlap... error("Conflicting options: --match-distance=All and --non-overlapping") if len(mode) != 1 : error("Exactly one option, -p or -n, must be provided") if "patterns" in mode: global patterns patterns = filetype.parse_entities([patterns_file])
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns global ignore_pos global surface_instead_lemmas global print_cand_freq global print_source global match_distance global non_overlapping global input_filetype_ext global output_filetype_ext global id_order treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] patterns_file = None for (o, a) in opts: if o in ("-p", "--patterns"): mode.append("patterns") patterns_file = a elif o in ("-n", "--ngram"): create_patterns_file(a) mode.append("ngram") elif o in ("-g", "--ignore-pos"): ignore_pos = True elif o in ("-d", "--match-distance"): match_distance = a elif o in ("-N", "--non-overlapping"): non_overlapping = True elif o in ("-s", "--surface"): surface_instead_lemmas = True elif o in ("-S", "--source"): print_source = True elif o in ("-f", "--freq"): print_cand_freq = True elif o in ("-i", "--index"): input_filetype_ext = "BinaryIndex" warn("Option -i is deprecated; use --from=BinaryIndex") elif o == "--id-order": id_order = a.split(":") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad flag") if non_overlapping and match_distance == "All": # If we are taking all matches, we need to be able to overlap... error( "Conflicting options: --match-distance=All and --non-overlapping") if len(mode) != 1: error("Exactly one option, -p or -n, must be provided") if "patterns" in mode: global patterns patterns = filetype.parse_entities([patterns_file])
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global thresh_source global thresh_value global equals_name global equals_value global reverse global minlength global maxlength global min_mweoccurs global max_mweoccurs global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-t", "--threshold"): threshold = interpret_threshold(a) if threshold: (thresh_source, thresh_value) = threshold else: error("The format of the -t argument must be <source>:" "<value>\n<source> must be a valid corpus name and " "<value> must be a non-negative integer") elif o in ("-e", "--equals"): equals = interpret_equals(a) if equals: (equals_name, equals_value) = equals else: error("The format of the -e argument must be <name>:" "<value>\n<name> must be a valid feat name and " "<value> must be a non-empty string") elif o in ("-p", "--patterns"): verbose("Reading patterns file") global patterns patterns = filetype.parse_entities([a]) elif o in ("-r", "--reverse"): reverse = True verbose("Option REVERSE active") elif o in ("-i", "--minlength"): minlength = interpret_length(a, "minimum") elif o in ("-a", "--maxlength"): maxlength = interpret_length(a, "maximum") elif o == "--min-mweoccurs": min_mweoccurs = interpret_length(a, "minimum") elif o == "--max-mweoccurs": max_mweoccurs = interpret_length(a, "maximum") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o) if minlength > maxlength: warn("minlength should be <= maxlength") if min_mweoccurs > max_mweoccurs: warn("min-mweoccurs should be <= max-mweoccurs")
elif o == "--corpus-from": corpus_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) if not reference_fname: error("No reference file given!") sentence_aligner = sentence_aligner_class() mwe_evaluator = mwe_evaluator_class(sentence_aligner) ################################################################################ # MAIN SCRIPT if __name__ == "__main__": longopts = [ "reference=", "sentence-aligner=", "evaluator=", "reference-from=", "corpus-from=" ] args = read_options("r:e:", longopts, treat_options, -1, usage_string) reference = parse_entities([reference_fname], reference_filetype_ext) prediction = parse_entities(args, corpus_filetype_ext) results = mwe_evaluator.compare_sentence_lists(reference, prediction) print("DEBUG:", results) print("Precision:", results.precision()) print("Recall:", results.recall()) print("F-measure:", results.f_measure())
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global thresh_source global thresh_value global equals_name global equals_value global reverse global minlength global maxlength global min_mweoccurs global max_mweoccurs global input_filetype_ext global output_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ( "-t", "--threshold" ) : threshold = interpret_threshold( a ) if threshold : (thresh_source, thresh_value) = threshold else : error( "The format of the -t argument must be <source>:" "<value>\n<source> must be a valid corpus name and " "<value> must be a non-negative integer") elif o in ( "-e", "--equals" ) : equals = interpret_equals( a ) if equals : ( equals_name, equals_value ) = equals else : error( "The format of the -e argument must be <name>:" "<value>\n<name> must be a valid feat name and " "<value> must be a non-empty string") elif o in ("-p", "--patterns") : verbose( "Reading patterns file" ) global patterns patterns = filetype.parse_entities([a]) elif o in ("-r", "--reverse") : reverse = True verbose("Option REVERSE active") elif o in ("-i", "--minlength") : minlength = interpret_length( a, "minimum" ) elif o in ("-a", "--maxlength") : maxlength = interpret_length( a, "maximum" ) elif o == "--min-mweoccurs": min_mweoccurs = interpret_length(a, "minimum") elif o == "--max-mweoccurs": max_mweoccurs = interpret_length(a, "maximum") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o) if minlength > maxlength: warn("minlength should be <= maxlength") if min_mweoccurs > max_mweoccurs: warn("min-mweoccurs should be <= max-mweoccurs")
mwe_evaluator_class = MWE_EVALUATORS[a] elif o == "--corpus-from": corpus_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) if not reference_fname: error("No reference file given!") sentence_aligner = sentence_aligner_class() mwe_evaluator = mwe_evaluator_class(sentence_aligner) ################################################################################ # MAIN SCRIPT if __name__ == "__main__": longopts = ["reference=", "sentence-aligner=", "evaluator=", "reference-from=", "corpus-from="] args = read_options("r:e:", longopts, treat_options, -1, usage_string) reference = parse_entities([reference_fname], reference_filetype_ext) prediction = parse_entities(args, corpus_filetype_ext) results = mwe_evaluator.compare_sentence_lists(reference, prediction) print("DEBUG:", results) print("Precision:", results.precision()) print("Recall:", results.recall()) print("F-measure:", results.f_measure())