Esempio n. 1
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global input_patterns
    global input_filetype_ext
    global output_filetype_ext
    global match_distance
    global non_overlapping
    global id_order
    global annotate
    global only_the_matching_subpart

    util.treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--input-from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-p", "--patterns"):
            input_patterns = filetype.parse_entities([a])
        elif o in ("-d", "--match-distance") : 
            match_distance = a
        elif o in ("-N", "--non-overlapping") : 
            non_overlapping = True
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--annotate":
            annotate = True
        elif o == "--only-matching":
            only_the_matching_subpart = True
        else:
            raise Exception("Bad arg " + o)

    if input_patterns is None:
        util.error("No patterns provided. Option --patterns is mandatory!")

    if only_the_matching_subpart and annotate:
        util.warn("Switch --only-matching disables --annotate")
Esempio n. 2
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns
    global ignore_pos
    global surface_instead_lemmas
    global print_cand_freq
    global print_source
    global match_distance
    global non_overlapping
    global input_filetype_ext
    global output_filetype_ext
    global id_order
    
    treat_options_simplest( opts, arg, n_arg, usage_string )
        
    mode = []
    patterns_file = None
    for ( o, a ) in opts:
        if o in ("-p", "--patterns") : 
            mode.append( "patterns" )
            patterns_file = a
        elif o in ( "-n", "--ngram" ) :
            create_patterns_file( a )
            mode.append( "ngram" )
        elif o in ("-g", "--ignore-pos") : 
            ignore_pos = True
        elif o in ("-d", "--match-distance") : 
            match_distance = a
        elif o in ("-N", "--non-overlapping") : 
            non_overlapping = True
        elif o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        elif o in ("-S", "--source") :
            print_source = True
        elif o in ("-f", "--freq") : 
            print_cand_freq = True
        elif o in ("-i", "--index") :
            input_filetype_ext = "BinaryIndex"
            warn("Option -i is deprecated; use --from=BinaryIndex")
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--from" :
            input_filetype_ext = a
        elif o == "--to" :
            output_filetype_ext = a
        else:
            raise Exception("Bad flag")

    if non_overlapping and match_distance == "All":
        # If we are taking all matches, we need to be able to overlap...
        error("Conflicting options: --match-distance=All and --non-overlapping")

    if len(mode) != 1 :
        error("Exactly one option, -p or -n, must be provided")
    if "patterns" in mode:
        global patterns
        patterns = filetype.parse_entities([patterns_file])
Esempio n. 3
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns
    global ignore_pos
    global surface_instead_lemmas
    global print_cand_freq
    global print_source
    global match_distance
    global non_overlapping
    global input_filetype_ext
    global output_filetype_ext
    global id_order

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    patterns_file = None
    for (o, a) in opts:
        if o in ("-p", "--patterns"):
            mode.append("patterns")
            patterns_file = a
        elif o in ("-n", "--ngram"):
            create_patterns_file(a)
            mode.append("ngram")
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-d", "--match-distance"):
            match_distance = a
        elif o in ("-N", "--non-overlapping"):
            non_overlapping = True
        elif o in ("-s", "--surface"):
            surface_instead_lemmas = True
        elif o in ("-S", "--source"):
            print_source = True
        elif o in ("-f", "--freq"):
            print_cand_freq = True
        elif o in ("-i", "--index"):
            input_filetype_ext = "BinaryIndex"
            warn("Option -i is deprecated; use --from=BinaryIndex")
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad flag")

    if non_overlapping and match_distance == "All":
        # If we are taking all matches, we need to be able to overlap...
        error(
            "Conflicting options: --match-distance=All and --non-overlapping")

    if len(mode) != 1:
        error("Exactly one option, -p or -n, must be provided")
    if "patterns" in mode:
        global patterns
        patterns = filetype.parse_entities([patterns_file])
Esempio n. 4
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global thresh_source
    global thresh_value
    global equals_name
    global equals_value
    global reverse
    global minlength
    global maxlength
    global min_mweoccurs
    global max_mweoccurs
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-t", "--threshold"):
            threshold = interpret_threshold(a)
            if threshold:
                (thresh_source, thresh_value) = threshold
            else:
                error("The format of the -t argument must be <source>:"
                      "<value>\n<source> must be a valid corpus name and "
                      "<value> must be a non-negative integer")
        elif o in ("-e", "--equals"):
            equals = interpret_equals(a)
            if equals:
                (equals_name, equals_value) = equals
            else:
                error("The format of the -e argument must be <name>:"
                      "<value>\n<name> must be a valid feat name and "
                      "<value> must be a non-empty string")

        elif o in ("-p", "--patterns"):
            verbose("Reading patterns file")
            global patterns
            patterns = filetype.parse_entities([a])
        elif o in ("-r", "--reverse"):
            reverse = True
            verbose("Option REVERSE active")

        elif o in ("-i", "--minlength"):
            minlength = interpret_length(a, "minimum")
        elif o in ("-a", "--maxlength"):
            maxlength = interpret_length(a, "maximum")
        elif o == "--min-mweoccurs":
            min_mweoccurs = interpret_length(a, "minimum")
        elif o == "--max-mweoccurs":
            max_mweoccurs = interpret_length(a, "maximum")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if minlength > maxlength:
        warn("minlength should be <= maxlength")
    if min_mweoccurs > max_mweoccurs:
        warn("min-mweoccurs should be <= max-mweoccurs")
Esempio n. 5
0
        elif o == "--corpus-from":
            corpus_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if not reference_fname:
        error("No reference file given!")

    sentence_aligner = sentence_aligner_class()
    mwe_evaluator = mwe_evaluator_class(sentence_aligner)


################################################################################
# MAIN SCRIPT

if __name__ == "__main__":
    longopts = [
        "reference=", "sentence-aligner=", "evaluator=", "reference-from=",
        "corpus-from="
    ]
    args = read_options("r:e:", longopts, treat_options, -1, usage_string)
    reference = parse_entities([reference_fname], reference_filetype_ext)
    prediction = parse_entities(args, corpus_filetype_ext)
    results = mwe_evaluator.compare_sentence_lists(reference, prediction)
    print("DEBUG:", results)
    print("Precision:", results.precision())
    print("Recall:", results.recall())
    print("F-measure:", results.f_measure())
Esempio n. 6
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global thresh_source
    global thresh_value
    global equals_name
    global equals_value
    global reverse
    global minlength
    global maxlength
    global min_mweoccurs
    global max_mweoccurs
    global input_filetype_ext
    global output_filetype_ext
    
    treat_options_simplest( opts, arg, n_arg, usage_string )    
    
    for ( o, a ) in opts:
        if o in ( "-t", "--threshold" ) : 
            threshold = interpret_threshold( a )
            if threshold :
                (thresh_source, thresh_value) = threshold
            else :
                error( "The format of the -t argument must be <source>:"
                       "<value>\n<source> must be a valid corpus name and "
                       "<value> must be a non-negative integer")
        elif o in ( "-e", "--equals" ) :
            equals = interpret_equals( a )
            if equals :
                ( equals_name, equals_value ) = equals
            else :
                error( "The format of the -e argument must be <name>:"
                       "<value>\n<name> must be a valid feat name and "
                       "<value> must be a non-empty string")

        elif o in ("-p", "--patterns") :
            verbose( "Reading patterns file" )
            global patterns
            patterns = filetype.parse_entities([a])
        elif o in ("-r", "--reverse") :
            reverse = True
            verbose("Option REVERSE active")

        elif o in ("-i", "--minlength") :
            minlength = interpret_length( a, "minimum" )
        elif o in ("-a", "--maxlength") :
            maxlength = interpret_length( a, "maximum" )
        elif o == "--min-mweoccurs":
            min_mweoccurs = interpret_length(a, "minimum")
        elif o == "--max-mweoccurs":
            max_mweoccurs = interpret_length(a, "maximum")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if minlength > maxlength:
        warn("minlength should be <= maxlength")
    if min_mweoccurs > max_mweoccurs:
        warn("min-mweoccurs should be <= max-mweoccurs")
Esempio n. 7
0
            mwe_evaluator_class = MWE_EVALUATORS[a]
        elif o == "--corpus-from":
            corpus_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if not reference_fname:
        error("No reference file given!")

    sentence_aligner = sentence_aligner_class()
    mwe_evaluator = mwe_evaluator_class(sentence_aligner)


        
################################################################################  
# MAIN SCRIPT


if __name__ == "__main__":
    longopts = ["reference=", "sentence-aligner=", "evaluator=", "reference-from=", "corpus-from="]
    args = read_options("r:e:", longopts, treat_options, -1, usage_string)
    reference = parse_entities([reference_fname], reference_filetype_ext)
    prediction = parse_entities(args, corpus_filetype_ext)
    results = mwe_evaluator.compare_sentence_lists(reference, prediction)
    print("DEBUG:", results)
    print("Precision:", results.precision())
    print("Recall:", results.recall())
    print("F-measure:", results.f_measure())