コード例 #1
0
ファイル: uniq.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global ignore_pos
    global surface_instead_lemmas
    global input_filetype_ext
    global output_filetype_ext
    
    treat_options_simplest( opts, arg, n_arg, usage_string )    

    for ( o, a ) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-g", "--ignore-pos") :
            ignore_pos = True
        elif o in ("-s", "--surface") :
            surface_instead_lemmas = True
        else:
            raise Exception("Bad arg: " + o)
コード例 #2
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global measures
    global supported_measures
    global main_freq_name
    global join_all_contrastive

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-m", "--measures"):
            try:
                measures = []
                measures = interpret_measures(a)
            except ValueError as message:
                error(
                    str(message) + "\nargument must be list separated by "
                    "\":\" and containing the names: " +
                    str(supported_measures))
        elif o in ("-o", "--original"):
            main_freq_name = a
        elif o in ("-a", "--all"):
            join_all_contrastive = True

    if not main_freq_name:
        error("Option -o is mandatory")
コード例 #3
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global algoname
    global lower_attr
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest( opts, arg, n_arg, usage_string )        

    for ( o, a ) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-l","--lemmas" ) :
            lower_attr = "lemma"
        elif o in ("-a", "--algorithm"):
            algoname = a.lower()
        elif o in ("-m", "-x"):
        	error( "Deprecated options -x and -m. Run with -h for details" )
        else:
            raise Exception("Bad arg: " + o)
コード例 #4
0
ファイル: changepos.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global simplify
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    simplify = simplify_ptb

    for (o, a) in opts:
        if o in ("-p", "--palavras"):
            simplify = simplify_palavras
        elif o in ("-G", "--genia"):
            simplify = simplify_genia
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #5
0
ファイル: head.py プロジェクト: poethan/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global limit
    global input_filetype_ext
    global output_filetype_ext
    
    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-n", "--number"):
            try:
                limit = int( a )
                if limit < 0:
                    raise ValueError
            except ValueError:
                error("You must provide a positive " \
                         "integer value as argument of -n option.")
        else:
            raise Exception("Bad arg")
コード例 #6
0
ファイル: transform.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global executable_w
    global executable_beg
    global executable_end
    global input_filetype_ext
    global output_filetype_ext
    
    util.treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o == "--begin":
            executable_beg = compile(a, "<cmdline:--begin>", "exec")
        elif o == "--end":
            executable_end = compile(a, "<cmdline:--end>", "exec")
        elif o in ("-w", "--each-word"):
            executable_w = compile(a, "<cmdline:--each-word>", "exec")
        else:
            raise Exception("Bad arg " + o)
コード例 #7
0
ファイル: lemmatise.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global web_freq

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    for (o, a) in opts:
        if o in ("-y", "--yahoo"):
            web_freq = YahooFreq()
            mode.append("yahoo")
        elif o in ("-w", "--google"):
            web_freq = GoogleFreq()
            mode.append("google")

    if len(mode) > 1:
        error("At most one option -y or -w, should be provided")
コード例 #8
0
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global executable_w
    global executable_beg
    global executable_end
    global input_filetype_ext
    global output_filetype_ext

    util.treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o == "--begin":
            executable_beg = compile(a, "<cmdline:--begin>", "exec")
        elif o == "--end":
            executable_end = compile(a, "<cmdline:--end>", "exec")
        elif o in ("-w", "--each-word"):
            executable_w = compile(a, "<cmdline:--each-word>", "exec")
        else:
            raise Exception("Bad arg " + o)
コード例 #9
0
ファイル: combine_freqs.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global combination
    global supported_combination
    global main_freq
    
    treat_options_simplest( opts, arg, n_arg, usage_string )
        
    for ( o, a ) in opts:
        if o in ( "-c", "--combination" ) :
            try :
                combination = []
                combination = interpret_combinations( a )
            except ValueError as message :
                print >> sys.stderr, message
                print >> sys.stderr, "ERROR: argument must be list separated"+ \
                                     "by \":\" and containing the names: "+\
                                     str( supported_combination )
                usage( usage_string )
                sys.exit( 2 )
        elif o in ( "-o", "--original" ) :
            main_freq = a
コード例 #10
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.
        
        @param usage_string The usage string for the current script.    
    """
    global attributes

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-a", "--attributes"):
            attributes = a.split(":")
            for attr in attributes:
                if attr not in WORD_ATTRIBUTES:
                    error("Unknown attribute '%s'!" % attr)

    if attributes is None:
        print >> sys.stderr, "The option -a <attributes> is mandatory."
        usage(usage_string)
        sys.exit(2)
コード例 #11
0
ファイル: measure.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global reference_fname
    global mwe_evaluator
    global corpus_filetype_ext
    global reference_filetype_ext

    sentence_aligner_class = NaiveSentenceAligner
    mwe_evaluator_class = ExactMatchMWEEvaluator

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-r", "--reference"):
            reference_fname = a
        elif o in ("--sentence-aligner"):
            sentence_aligner_class = SENTENCE_ALIGNERS[a]
        elif o in ("-e", "--evaluator"):
            mwe_evaluator_class = MWE_EVALUATORS[a]
        elif o == "--corpus-from":
            corpus_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if not reference_fname:
        error("No reference file given!")

    sentence_aligner = sentence_aligner_class()
    mwe_evaluator = mwe_evaluator_class(sentence_aligner)
コード例 #12
0
ファイル: content_words.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global input_filetype_ext
    global output_filetype_ext
    global append_pos_tag
    global clean_special

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == ("--from"):
            input_filetype_ext = a
        elif o == ("--to"):
            output_filetype_ext = a
        elif o == "--append-pos-tag":
        	if a in ("coarse","fine"):
	            append_pos_tag = a
	        else:
	        	error("Expected \"coarse\" or \"fine\", found " + a)
        elif o == "--clean-special":
        	clean_special = True
        else:
            raise Exception("Bad arg: " + o)
コード例 #13
0
ファイル: to_ucs.py プロジェクト: poethan/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global lemmapos
    global input_filetype_ext
    
    treat_options_simplest( opts, arg, n_arg, usage_string )

    for ( o, a ) in opts:        
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        elif o in ("-p", "--lemmapos") : 
            lemmapos = True   
        elif o in ("-f", "--freq-source") : 
            freq_source = a
        elif o == "--from":
            input_filetype_ext = a                          
        else:
            raise Exception("Bad arg: " + o)
コード例 #14
0
ファイル: lemmatise.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global web_freq

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    for ( o, a ) in opts:
        if o in ( "-y", "--yahoo" ):
            web_freq = YahooFreq()
            mode.append("yahoo")
        elif o in ( "-w", "--google" ):
            web_freq = GoogleFreq()
            mode.append("google")

    if len(mode) > 1:
        error("At most one option -y or -w, should be provided")
コード例 #15
0
def treat_options(opts, arg, n_arg, usage_string):
    """  
    Callback function that handles the command options of this script.

    @param opts The options parsed by getopts. Ignored.

    @param arg The argument list parsed by getopts.

    @param n_arg The number os arguments expected for this script.

    @param usage_string Instructions that appear if you run the program with
    the wrong parameters or options.
    """
    global morphg_folder
    global morphg_file
    global generate_text
    treat_options_simplest(opts, arg, n_arg, usage_string)
    for (o, a) in opts:
        if o in ("-m", "--morphg"):
            morphg_folder, morphg_file = os.path.split(a)
        elif o in ("-x", "--moses"):
            generate_text = True
    if not os.path.exists(os.path.join(morphg_folder, morphg_file)):
        warn("morphg not found !!! - outputting analysed forms")
        morphg_file = None
        morphg_folder = None
コード例 #16
0
ファイル: xmlcorpus2txt.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.
        
        @param usage_string The usage string for the current script.    
    """
    global attributes

    treat_options_simplest( opts, arg, n_arg, usage_string )
    
    for (o, a) in opts:
        if o in ("-a", "--attributes"):
            attributes = a.split(":")
            for attr in attributes:
                if attr not in WORD_ATTRIBUTES:
                    error("Unknown attribute '%s'!" % attr)

    if attributes is None:
        print >>sys.stderr, "The option -a <attributes> is mandatory."
        usage(usage_string)
        sys.exit(2)
コード例 #17
0
ファイル: select.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global input_filetype_ext
    global output_filetype_ext
    global regex_word_lemma
    global regex_word_surface
    global regex_word_pos
    global regex_word_syn
    global take_lemma

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("--from"):
            input_filetype_ext = a
        elif o in ("--to"):
            output_filetype_ext = a
        elif o == "--keep-empty-words":
            keep_empty_words = True
        elif o == "--word-lemmas":
            take_lemma = True
        elif o == "--word-lemmas-matching":
            regex_word_lemma = a
        elif o == "--word-surfaces-matching":
            regex_word_surface = a
        elif o == "--word-pos-matching":
            regex_word_pos = a
        elif o == "--word-syn-matching":
            regex_word_syn = a
        else:
            raise Exception("Bad arg")
コード例 #18
0
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global reference_fname
    global mwe_evaluator
    global corpus_filetype_ext
    global reference_filetype_ext

    sentence_aligner_class = NaiveSentenceAligner
    mwe_evaluator_class = ExactMatchMWEEvaluator

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-r", "--reference"):
            reference_fname = a
        elif o in ("--sentence-aligner"):
            sentence_aligner_class = SENTENCE_ALIGNERS[a]
        elif o in ("-e", "--evaluator"):
            mwe_evaluator_class = MWE_EVALUATORS[a]
        elif o == "--corpus-from":
            corpus_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if not reference_fname:
        error("No reference file given!")

    sentence_aligner = sentence_aligner_class()
    mwe_evaluator = mwe_evaluator_class(sentence_aligner)
コード例 #19
0
ファイル: tail.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global limit
    global entity_buffer
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for ( o, a ) in opts:
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-n", "--number"):
            try:
                limit = int(a)
                entity_buffer = [None] * limit
                if limit < 0:
                    raise ValueError
            except ValueError:
                error("You must provide a positive " + \
                      "integer value as argument of -n option.")
        else:
            raise Exception("Bad arg: " + o)
コード例 #20
0
ファイル: feat_contrast.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global measures
    global supported_measures
    global main_freq_name
    global join_all_contrastive
    
    treat_options_simplest( opts, arg, n_arg, usage_string )
    
    for ( o, a ) in opts:
        if o in ( "-m", "--measures" ) :
            try :
                measures = []
                measures = interpret_measures( a )
            except ValueError as message :
                error( str(message)+"\nargument must be list separated by "
                                    "\":\" and containing the names: "+
                       str( supported_measures ))
        elif o in ( "-o", "--original" ) :
            main_freq_name = a
        elif o in ( "-a", "--all" ) :
            join_all_contrastive = True
    
    if not main_freq_name :
        error( "Option -o is mandatory")
コード例 #21
0
ファイル: rasp2xml.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string):
    """  
    Callback function that handles the command options of this script.

    @param opts The options parsed by getopts. Ignored.

    @param arg The argument list parsed by getopts.

    @param n_arg The number os arguments expected for this script.

    @param usage_string Instructions that appear if you run the program with
    the wrong parameters or options.
    """
    global morphg_folder
    global morphg_file
    global generate_text
    treat_options_simplest( opts, arg, n_arg, usage_string )
    for (o, a) in opts:
        if o in ("-m","--morphg"):
            morphg_folder, morphg_file = os.path.split( a )
        elif o in ("-x","--moses"):
            generate_text = True
    if not os.path.exists( os.path.join( morphg_folder, morphg_file ) ) :
        warn( "morphg not found !!! - outputting analysed forms" )
        morphg_file = None
        morphg_folder = None
コード例 #22
0
ファイル: combine_freqs.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global combination
    global supported_combination
    global main_freq

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-c", "--combination"):
            try:
                combination = []
                combination = interpret_combinations(a)
            except ValueError as message:
                print >> sys.stderr, message
                print >> sys.stderr, "ERROR: argument must be list separated"+ \
                                     "by \":\" and containing the names: "+\
                                     str( supported_combination )
                usage(usage_string)
                sys.exit(2)
        elif o in ("-o", "--original"):
            main_freq = a
コード例 #23
0
ファイル: to_human.py プロジェクト: poethan/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global reference_fname
    global mwe_evaluator

    treat_options_simplest(opts, arg, n_arg, usage_string)
コード例 #24
0
ファイル: annotate_mwe.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global filetype_corpus_ext
    global filetype_candidates_ext
    global output_filetype_ext
    global action_annotate
    global action_filter

    treat_options_simplest(opts, arg, n_arg, usage_string)

    detector_class = ContiguousLemmaDetector
    candidates_fnames = []
    n_gaps = None

    for (o, a) in opts:
        if o in ("-c", "--candidates"):
            candidates_fnames.append(a)
        elif o in ("-d", "--detector"):
            detector_class = detectors.get(a,None)
            if detector_class is None :
                error("Unkown detector name: "+a)
        elif o in ("-S", "--source"):
            detector_class = SourceDetector
        elif o in ("-g", "--gaps"):
            n_gaps = int(a)
        elif o == "--corpus-from":
            filetype_corpus_ext = a
        elif o == "--candidates-from":
            filetype_candidates_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o == "--filter":
            action_annotate = False
            action_filter = True
        elif o == "--filter-and-annot":            
            action_filter = True            
        else:
            raise Exception("Bad arg: " + o)

    if not candidates_fnames:
        error("No candidates file given!")
    if detector_class == SourceDetector and n_gaps is not None:
        error('Bad arguments: method "Source" with "--gaps"')
    c = CandidatesHandler()
    verbose("Reading MWE list from candidates file")
    filetype.parse(candidates_fnames,
            c, filetype_candidates_ext)
    verbose("MWE list loaded in memory successfully")
    global detector
    detector = detector_class(c.info, n_gaps)
コード例 #25
0
ファイル: annotate_mwe.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global filetype_corpus_ext
    global filetype_candidates_ext
    global output_filetype_ext
    global action_annotate
    global action_filter

    treat_options_simplest(opts, arg, n_arg, usage_string)

    detector_class = ContiguousLemmaDetector
    candidates_fnames = []
    n_gaps = None

    for (o, a) in opts:
        if o in ("-c", "--candidates"):
            candidates_fnames.append(a)
        elif o in ("-d", "--detector"):
            detector_class = detectors.get(a, None)
            if detector_class is None:
                error("Unkown detector name: " + a)
        elif o in ("-S", "--source"):
            detector_class = SourceDetector
        elif o in ("-g", "--gaps"):
            n_gaps = int(a)
        elif o == "--corpus-from":
            filetype_corpus_ext = a
        elif o == "--candidates-from":
            filetype_candidates_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o == "--filter":
            action_annotate = False
            action_filter = True
        elif o == "--filter-and-annot":
            action_filter = True
        else:
            raise Exception("Bad arg: " + o)

    if not candidates_fnames:
        error("No candidates file given!")
    if detector_class == SourceDetector and n_gaps is not None:
        error('Bad arguments: method "Source" with "--gaps"')
    c = CandidatesHandler()
    verbose("Reading MWE list from candidates file")
    filetype.parse(candidates_fnames, c, filetype_candidates_ext)
    verbose("MWE list loaded in memory successfully")
    global detector
    detector = detector_class(c.info, n_gaps)
コード例 #26
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts.        
        @param arg The argument list parsed by getopts.        
        @param n_arg The number of arguments expected for this script.        
        @param usage_string The usage string printed if the arguments are wrong.        
    """

    global first_header
    global first_rater
    global calculate_pairwise
    global calculate_confusion
    global separator
    global distances_matrix
    global unknown

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-r", "--raters"):
            verbose("First row in file ignored -> considered as rater labels")
            first_header = True
        if o in ("-i", "--items"):
            verbose(
                "First column in file ignored -> considered as item labels")
            first_rater = 1
        if o in ("-p", "--pairwise"):
            verbose("Computing pairwise coefficients")
            calculate_pairwise = True
        if o in ("-u", "--unknown"):
            verbose("Unknown value - TODO: implement: " + a)
            unknown = a
        if o in ("-s", "--separator"):
            verbose("Field separator: " + a)
            separator = a
            if len(separator) > 1:
                warn("Multi-char field separator!")
        if o in ("-d", "--distance"):
            verbose("Calculating weighted coefficients using distance file")
            distances_matrix = read_distances(a)
            if distances_matrix is None:
                warn(
                    "Error in distance matrix! Weighted coefficients will use 1.0 as default distance"
                )
        if o in ("-c", "--confusion"):
            verbose("Calculating confusion matrices")
            calculate_confusion = True
コード例 #27
0
ファイル: wc.py プロジェクト: poethan/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global input_filetype_ext
    
    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg")
コード例 #28
0
ファイル: xml2owl.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    
    treat_options_simplest( opts, arg, n_arg, usage_string )    
    
    mode = []
    for ( o, a ) in opts:
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        else:
            raise Exception("Bad arg: " + o)
コード例 #29
0
ファイル: eval_automatic.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global pre_gs
    global ignore_pos
    global gs_name
    global ignore_case
    global lemma_or_surface
    global input_filetype_ext
    global reference_filetype_ext
    ref_name = None

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-r", "--reference"):
            ref_name = a
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-c", "--case"):
            ignore_case = False
        elif o in ("-L", "--lemma-or-surface"):
            lemma_or_surface = True
        elif o == "--input-from":
            input_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    # The reference list needs to be opened after all the options are read,
    # since options such as -g and -c modify the way the list is represented
    if ref_name:
        filetype.parse([ref_name], ReferenceReaderHandler(),
                       reference_filetype_ext)
        gs_name = re.sub(".*/", "", re.sub("\.xml", "", ref_name))
    # There's no reference list... Oh oh cannot evaluate :-(
    if not pre_gs:
        error("You MUST provide a non-empty reference list!")
コード例 #30
0
ファイル: kappa.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts.        
        @param arg The argument list parsed by getopts.        
        @param n_arg The number of arguments expected for this script.        
        @param usage_string The usage string printed if the arguments are wrong.        
    """
    
    global first_header
    global first_rater
    global calculate_pairwise
    global calculate_confusion
    global separator
    global distances_matrix
    global unknown
    
    treat_options_simplest( opts, arg, n_arg, usage_string )

    for ( o, a ) in opts:        
        if o in ("-r", "--raters") :
            verbose( "First row in file ignored -> considered as rater labels")
            first_header = True     
        if o in ("-i", "--items") : 
            verbose("First column in file ignored -> considered as item labels")        
            first_rater = 1 
        if o in ("-p", "--pairwise") : 
            verbose( "Computing pairwise coefficients" )
            calculate_pairwise = True
        if o in ("-u", "--unknown") : 
            verbose( "Unknown value - TODO: implement: " + a )
            unknown = a
        if o in ("-s", "--separator") : 
            verbose( "Field separator: " + a )
            separator = a
            if len( separator ) > 1 :
                warn("Multi-char field separator!")
        if o in ("-d", "--distance") :
            verbose("Calculating weighted coefficients using distance file")
            distances_matrix = read_distances( a )
            if distances_matrix is None :
                warn("Error in distance matrix! Weighted coefficients will use 1.0 as default distance")
        if o in ("-c", "--confusion") :
            verbose( "Calculating confusion matrices" )
            calculate_confusion = True
コード例 #31
0
ファイル: eval_automatic.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global pre_gs
    global ignore_pos
    global gs_name
    global ignore_case
    global lemma_or_surface
    global input_filetype_ext
    global reference_filetype_ext
    ref_name = None
    
    treat_options_simplest( opts, arg, n_arg, usage_string )    
    
    for ( o, a ) in opts:
        if o in ("-r", "--reference"):
             ref_name = a
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-c", "--case"):
            ignore_case = False
        elif o in ("-L", "--lemma-or-surface"):
            lemma_or_surface = True
        elif o == "--input-from":
            input_filetype_ext = a
        elif o == "--reference-from":
            reference_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
            
    # The reference list needs to be opened after all the options are read,
    # since options such as -g and -c modify the way the list is represented
    if ref_name :
        filetype.parse([ref_name], ReferenceReaderHandler(), reference_filetype_ext)
        gs_name = re.sub( ".*/", "", re.sub( "\.xml", "", ref_name ) )
    # There's no reference list... Oh oh cannot evaluate :-(
    if not pre_gs :
        error("You MUST provide a non-empty reference list!")
コード例 #32
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global sent_handler
    global lower_attr
    global input_filetype_ext
    global output_filetype_ext

    ctxinfo = util.CmdlineContextInfo(opts)
    util.treat_options_simplest(opts, arg, n_arg, usage_string)
    sent_handler = LowercaserHandler.handle_sentence_simple # default
    
    for o, a in ctxinfo.iter(opts):
        if o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-l","--lemmas" ) :
            lower_attr = "lemma"
        elif o in ("-a", "--algorithm"):
            algoname = a.lower()
            if algoname == "simple" :  # Redundant, kept for clarity
                sent_handler = LowercaserHandler.handle_sentence_simple
            elif algoname == "complex" :
                sent_handler = LowercaserHandler.handle_sentence_complex
            elif algoname == "aggressive" :  # Redundant, kept for clarity
                sent_handler = LowercaserHandler.handle_sentence_aggressive
            else :
                ctxinfo.error("Bad algorithm name `{name}`", name=algoname)

        elif o == "-m":
            ctxinfo.error("Deprecated option. Use --from=Moses instead" )
        elif o == "-x":
            ctxinfo.error("Deprecated option. " \
                    "Use --from=PlainCorpus instead")
        else:
            raise Exception("Bad arg: " + o)
コード例 #33
0
ファイル: histogram.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global limit
    
    treat_options_simplest( opts, arg, n_arg, usage_string )    
    
    for ( o, a ) in opts:
        if o in ("-n", "--number") :
            try :
                limit = int( a )
                if limit < 0 :
                    raise ValueError
            except ValueError :
                error("You must provide a positive integer value as argument "
                      "of -n option.")
コード例 #34
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency
    global ngram_counts
    global selected_candidates
    global use_shelve
    global input_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    for (o, a) in opts:
        if o in ("-s", "--surface"):
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq"):
            min_frequency = int(a)
        elif o in ("-n", "--ngram"):
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)
        elif o in ("-S", "--shelve"):
            use_shelve = True
        elif o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #35
0
ファイル: localmaxs.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency
    global ngram_counts
    global selected_candidates
    global use_shelve
    global input_filetype_ext

    treat_options_simplest( opts, arg, n_arg, usage_string )

    mode = []
    for ( o, a ) in opts:
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq") :
            min_frequency = int(a)
        elif o in ("-n", "--ngram") :
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)
        elif o in ("-S", "--shelve"):
            use_shelve = True
        elif o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #36
0
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global limit

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-n", "--number"):
            try:
                limit = int(a)
                if limit < 0:
                    raise ValueError
            except ValueError:
                error("You must provide a positive integer value as argument "
                      "of -n option.")
コード例 #37
0
ファイル: grep.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    """
    global input_patterns
    global input_filetype_ext
    global output_filetype_ext
    global match_distance
    global non_overlapping
    global id_order
    global annotate
    global only_the_matching_subpart

    util.treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o == "--input-from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        elif o in ("-p", "--patterns"):
            input_patterns = filetype.parse_entities([a])
        elif o in ("-d", "--match-distance") : 
            match_distance = a
        elif o in ("-N", "--non-overlapping") : 
            non_overlapping = True
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--annotate":
            annotate = True
        elif o == "--only-matching":
            only_the_matching_subpart = True
        else:
            raise Exception("Bad arg " + o)

    if input_patterns is None:
        util.error("No patterns provided. Option --patterns is mandatory!")

    if only_the_matching_subpart and annotate:
        util.warn("Switch --only-matching disables --annotate")
コード例 #38
0
ファイル: from_treetagger.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    @param usage_string Instructions that appear if you run the program with
    the wrong parameters or options.
    """
    global sent_split
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for ( o, a ) in opts:
        if o in ("-s", "--sentence"):
            sent_split = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #39
0
def treat_options(opts, arg, n_arg, usage_string):
    """Callback function that handles the command line options of this script.
    @param opts The options parsed by getopts. Ignored.
    @param arg The argument list parsed by getopts.
    @param n_arg The number of arguments expected for this script.
    @param usage_string Instructions that appear if you run the program with
    the wrong parameters or options.
    """
    global sent_split
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-s", "--sentence"):
            sent_split = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #40
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global feat_list
    global ascending
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    a_or_d = []
    for (o, a) in opts:
        if o in ("-f", "--feat"):
            #import pdb
            #pdb.set_trace()
            feat_list = treat_feat_list(a)
        elif o in ("-a", "--asc"):
            ascending = True
            a_or_d.append("a")
        elif o in ("-d", "--desc"):
            ascending = False
            a_or_d.append("d")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg")

    if len(a_or_d) > 1:
        warn("You must provide only one option, -a OR -d. " \
                "Only the last one will be considered.")
コード例 #41
0
ファイル: sort.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global feat_list
    global ascending
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    a_or_d = []
    for ( o, a ) in opts:
        if o in ("-f", "--feat"):
            #import pdb
            #pdb.set_trace()
            feat_list = treat_feat_list(a)
        elif o in ("-a", "--asc"):
            ascending = True
            a_or_d.append("a")
        elif o in ("-d", "--desc"):
            ascending = False
            a_or_d.append("d")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg")

    if len(a_or_d) > 1:
        warn("You must provide only one option, -a OR -d. " \
                "Only the last one will be considered.")
コード例 #42
0
ファイル: avg_precision.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global feat_list
    global ascending
    global print_precs

    treat_options_simplest(opts, arg, n_arg, usage_string)

    a_or_d = []
    for (o, a) in opts:
        if o in ("-f", "--feat"):
            feat_list = treat_feat_list(a)
        elif o in ("-a", "--asc"):
            ascending = True
            a_or_d.append("a")
        elif o in ("-d", "--desc"):
            ascending = False
            a_or_d.append("d")
        elif o in ("-p", "--precs"):
            print_precs = True
        elif o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if len(a_or_d) > 1:
        warn("you should provide only one option, -a OR -d. Only the last one"+\
             " will be considered.")
    if not feat_list:
        error("You MUST provide at least one feature with -f")
コード例 #43
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global measures
    global supported_measures
    global main_freq
    global not_normalize_mle
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-m", "--measures"):
            try:
                measures = interpret_measures(a)
            except ValueError as message:
                error(
                    str(message) + "\nargument must be list separated by "
                    "\":\" and containing the names: " +
                    str(supported_measures))
        elif o in ("-o", "--original"):
            main_freq = a
        elif o in ("-u", "--unnorm-mle"):
            not_normalize_mle = True
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #44
0
ファイル: feat_association.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global measures
    global supported_measures
    global main_freq
    global not_normalize_mle
    global input_filetype_ext
    global output_filetype_ext
    
    treat_options_simplest( opts, arg, n_arg, usage_string )
        
    for ( o, a ) in opts:
        if o in ( "-m", "--measures" ) :
            try :
                measures = interpret_measures( a )
            except ValueError as message :
                error( str(message) + "\nargument must be list separated by "
                                      "\":\" and containing the names: " +
                       str( supported_measures ))
        elif o in ( "-o", "--original" ) :
            main_freq = a
        elif o in ( "-u", "--unnorm-mle" ) :
            not_normalize_mle = True
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)
コード例 #45
0
ファイル: avg_precision.py プロジェクト: pombredanne/nlp-3
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.

        @param opts The options parsed by getopts. Ignored.

        @param arg The argument list parsed by getopts.

        @param n_arg The number of arguments expected for this script.
    """
    global feat_list
    global ascending
    global print_precs

    treat_options_simplest(opts, arg, n_arg, usage_string)

    a_or_d = []
    for (o, a) in opts:
        if o in ("-f", "--feat"):
            feat_list = treat_feat_list(a)
        elif o in ("-a", "--asc"):
            ascending = True
            a_or_d.append("a")
        elif o in ("-d", "--desc"):
            ascending = False
            a_or_d.append("d")
        elif o in ("-p", "--precs"):
            print_precs = True
        elif o == "--from":
            input_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if len(a_or_d) > 1:
        warn("you should provide only one option, -a OR -d. Only the last one" + " will be considered.")
    if not feat_list:
        error("You MUST provide at least one feature with -f")
コード例 #46
0
ファイル: index.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global used_attributes
    global basename
    global build_entry
    global use_text_format
    global input_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    used_attributes = ["lemma", "pos", "surface", "syn"]
    for (o, a) in opts:
        if o in ("-i", "--index"):
            basename = a
        elif o == "--from":
            input_filetype_ext = a
        elif o in ("-a", "--attributes"):
            used_attributes = a.split(":")
        elif o in ("-m", "--moses"):
            use_text_format = "moses"
        elif o in ("-c", "--conll"):
            use_text_format = "conll"
        elif o in ("-o", "--old"):
            indexlib.Index.use_c_indexer(False)

    if basename is None:
        error("You must provide a filename for the index.\n"
              "Option -i is mandatory.")
コード例 #47
0
ファイル: index.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global used_attributes
    global basename
    global build_entry
    global use_text_format
    global input_filetype_ext

    treat_options_simplest( opts, arg, n_arg, usage_string )

    used_attributes = ["lemma", "pos", "surface", "syn"]
    for ( o, a ) in opts:
        if o in ("-i", "--index") :
            basename = a
        elif o == "--from":
            input_filetype_ext = a
        elif o in ("-a", "--attributes"):
            used_attributes = a.split(":")
        elif o in ("-m", "--moses"):
            use_text_format = "moses"
        elif o in ("-c", "--conll"):
            use_text_format = "conll"            
        elif o in ("-o", "--old"):
            indexlib.Index.use_c_indexer(False)
            
    if basename is None:     
        error("You must provide a filename for the index.\n"
              "Option -i is mandatory.")
コード例 #48
0
ファイル: localmaxs.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global corpus_from_index
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency

    treat_options_simplest( opts, arg, n_arg, usage_string )

    mode = []
    for ( o, a ) in opts:
        if o in ("-s", "--surface") : 
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq") :
            min_frequency = int(a)
        elif o in ("-n", "--ngram") :
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-i", "--index") :
            corpus_from_index = True
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)
コード例 #49
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global surface_instead_lemmas
    global glue
    global corpus_from_index
    global base_attr
    global min_ngram
    global max_ngram
    global min_frequency

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    for (o, a) in opts:
        if o in ("-s", "--surface"):
            surface_instead_lemmas = True
            base_attr = 'surface'
        elif o in ("-f", "--freq"):
            min_frequency = int(a)
        elif o in ("-n", "--ngram"):
            (min_ngram, max_ngram) = interpret_ngram(a)
        elif o in ("-i", "--index"):
            corpus_from_index = True
        elif o in ("-G", "--glue"):
            if a == "scp":
                glue = scp_glue
            else:
                error("Unknown glue function '%s'" % a)
コード例 #50
0
ファイル: filter.py プロジェクト: poethan/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global thresh_source
    global thresh_value
    global equals_name
    global equals_value
    global reverse
    global minlength
    global maxlength
    global min_mweoccurs
    global max_mweoccurs
    global input_filetype_ext
    global output_filetype_ext

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-t", "--threshold"):
            threshold = interpret_threshold(a)
            if threshold:
                (thresh_source, thresh_value) = threshold
            else:
                error("The format of the -t argument must be <source>:"
                      "<value>\n<source> must be a valid corpus name and "
                      "<value> must be a non-negative integer")
        elif o in ("-e", "--equals"):
            equals = interpret_equals(a)
            if equals:
                (equals_name, equals_value) = equals
            else:
                error("The format of the -e argument must be <name>:"
                      "<value>\n<name> must be a valid feat name and "
                      "<value> must be a non-empty string")

        elif o in ("-p", "--patterns"):
            verbose("Reading patterns file")
            global patterns
            patterns = filetype.parse_entities([a])
        elif o in ("-r", "--reverse"):
            reverse = True
            verbose("Option REVERSE active")

        elif o in ("-i", "--minlength"):
            minlength = interpret_length(a, "minimum")
        elif o in ("-a", "--maxlength"):
            maxlength = interpret_length(a, "maximum")
        elif o == "--min-mweoccurs":
            min_mweoccurs = interpret_length(a, "minimum")
        elif o == "--max-mweoccurs":
            max_mweoccurs = interpret_length(a, "maximum")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if minlength > maxlength:
        warn("minlength should be <= maxlength")
    if min_mweoccurs > max_mweoccurs:
        warn("min-mweoccurs should be <= max-mweoccurs")
コード例 #51
0
ファイル: candidates.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns
    global ignore_pos
    global surface_instead_lemmas
    global print_cand_freq
    global print_source
    global match_distance
    global non_overlapping
    global input_filetype_ext
    global output_filetype_ext
    global id_order
    
    treat_options_simplest( opts, arg, n_arg, usage_string )
        
    mode = []
    patterns_file = None
    for ( o, a ) in opts:
        if o in ("-p", "--patterns") : 
            mode.append( "patterns" )
            patterns_file = a
        elif o in ( "-n", "--ngram" ) :
            create_patterns_file( a )
            mode.append( "ngram" )
        elif o in ("-g", "--ignore-pos") : 
            ignore_pos = True
        elif o in ("-d", "--match-distance") : 
            match_distance = a
        elif o in ("-N", "--non-overlapping") : 
            non_overlapping = True
        elif o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        elif o in ("-S", "--source") :
            print_source = True
        elif o in ("-f", "--freq") : 
            print_cand_freq = True
        elif o in ("-i", "--index") :
            input_filetype_ext = "BinaryIndex"
            warn("Option -i is deprecated; use --from=BinaryIndex")
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--from" :
            input_filetype_ext = a
        elif o == "--to" :
            output_filetype_ext = a
        else:
            raise Exception("Bad flag")

    if non_overlapping and match_distance == "All":
        # If we are taking all matches, we need to be able to overlap...
        error("Conflicting options: --match-distance=All and --non-overlapping")

    if len(mode) != 1 :
        error("Exactly one option, -p or -n, must be provided")
    if "patterns" in mode:
        global patterns
        patterns = filetype.parse_entities([patterns_file])
コード例 #52
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns
    global ignore_pos
    global surface_instead_lemmas
    global print_cand_freq
    global print_source
    global match_distance
    global non_overlapping
    global input_filetype_ext
    global output_filetype_ext
    global id_order

    treat_options_simplest(opts, arg, n_arg, usage_string)

    mode = []
    patterns_file = None
    for (o, a) in opts:
        if o in ("-p", "--patterns"):
            mode.append("patterns")
            patterns_file = a
        elif o in ("-n", "--ngram"):
            create_patterns_file(a)
            mode.append("ngram")
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-d", "--match-distance"):
            match_distance = a
        elif o in ("-N", "--non-overlapping"):
            non_overlapping = True
        elif o in ("-s", "--surface"):
            surface_instead_lemmas = True
        elif o in ("-S", "--source"):
            print_source = True
        elif o in ("-f", "--freq"):
            print_cand_freq = True
        elif o in ("-i", "--index"):
            input_filetype_ext = "BinaryIndex"
            warn("Option -i is deprecated; use --from=BinaryIndex")
        elif o == "--id-order":
            id_order = a.split(":")
        elif o == "--from":
            input_filetype_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad flag")

    if non_overlapping and match_distance == "All":
        # If we are taking all matches, we need to be able to overlap...
        error(
            "Conflicting options: --match-distance=All and --non-overlapping")

    if len(mode) != 1:
        error("Exactly one option, -p or -n, must be provided")
    if "patterns" in mode:
        global patterns
        patterns = filetype.parse_entities([patterns_file])
コード例 #53
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global count_vars
    global language
    global suffix_array
    global count_joint_frequency
    global count_bigrams
    global web1t_data_path
    global filetype_corpus_ext
    global filetype_candidates_ext
    global output_filetype_ext

    surface_flag = False
    ignorepos_flag = False
    mode = []

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for (o, a) in opts:
        if o in ("-i", "--index"):
            open_index(a)
            get_freq_function = get_freq_index
            mode.append("index")
        elif o in ("-y", "--yahoo"):
            error("THIS OPTION IS DEPRECATED AS YAHOO SHUT DOWN THEIR FREE "
                  "SEARCH API")
            #web_freq = YahooFreq()
            #freq_name = "yahoo"
            #ignorepos_flag = True
            #the_corpus_size = web_freq.corpus_size()
            #get_freq_function = get_freq_web
            #mode.append( "yahoo" )
        elif o in ("-w", "--google"):
            web_freq = GoogleFreq()
            freq_name = "google"
            ignorepos_flag = True
            the_corpus_size = web_freq.corpus_size()
            get_freq_function = get_freq_web
            mode.append("google")
        elif o in ("-u", "--univ"):
            web_freq = GoogleFreqUniv(a)
            freq_name = "google"
            ignorepos_flag = True
            the_corpus_size = web_freq.corpus_size()
            get_freq_function = get_freq_web
            mode.append("google")
        elif o in ("-T", "--web1t"):
            ignorepos_flag = True
            freq_name = "web1t"
            web1t_data_path = a
            the_corpus_size = int(read_file(web1t_data_path + "/1gms/total"))
            get_freq_function = get_freq_web1t
            mode.append("web1t")
        elif o in ("-s", "--surface"):
            surface_flag = True
        elif o in ("-g", "--ignore-pos"):
            ignorepos_flag = True
        elif o in ("--lower", "--upper"):
            try:
                limit = int(a)
                if limit < 0:
                    raise ValueError, "Argument of " + o + " must be positive"
                if o == "--lower":
                    if up_limit == -1 or up_limit >= limit:
                        low_limit = limit
                    else:
                        raise ValueError, "Argument of -f >= argument of -t"
                else:
                    if low_limit == -1 or low_limit <= limit:
                        up_limit = limit
                    else:
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError as message:
                error(str(message) + "\nArgument of " + o + " must be integer")
        elif o in ("-a", "--vars"):
            count_vars = True
        elif o in ("-l", "--lang"):
            language = a
        elif o in ("-J", "--no-joint"):
            count_joint_frequency = False
        elif o in ("-B", "--bigrams"):
            count_bigrams = True
        elif o in ("-o", "--old"):
            Index.use_c_indexer(False)
        elif o == "--corpus-from":
            filetype_corpus_ext = a
        elif o == "--candidates-from":
            filetype_candidates_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if mode == ["index"]:
        if surface_flag and ignorepos_flag:
            build_entry = lambda surface, lemma, pos: surface
            suffix_array = index.load("surface")
        elif surface_flag:
            build_entry = lambda surface, lemma, pos: surface +\
                                                      ATTRIBUTE_SEPARATOR + pos
            suffix_array = index.load("surface+pos")
        elif ignorepos_flag:
            build_entry = lambda surface, lemma, pos: lemma
            suffix_array = index.load("lemma")
        else:
            build_entry = lambda surface, lemma, pos: lemma +\
                                                      ATTRIBUTE_SEPARATOR + pos
            suffix_array = index.load("lemma+pos")

    else:  # Web search, entries are single surface or lemma forms
        if surface_flag:
            build_entry = lambda surface, lemma, pos: surface
        else:
            build_entry = lambda surface, lemma, pos: lemma

    if len(mode) != 1:
        error("Exactly one option -u, -w or -i, must be provided")
コード例 #54
0
ファイル: counter.py プロジェクト: KWARC/mwetoolkit
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global count_vars
    global language
    global suffix_array
    global count_joint_frequency
    global count_bigrams
    global web1t_data_path
    global filetype_corpus_ext
    global filetype_candidates_ext
    global output_filetype_ext

    surface_flag = False
    ignorepos_flag = False
    mode = []

    treat_options_simplest(opts, arg, n_arg, usage_string)

    for ( o, a ) in opts:
        if o in ( "-i", "--index" ):
            open_index(a)
            get_freq_function = get_freq_index
            mode.append("index")
        elif o in ( "-y", "--yahoo" ):
            error("THIS OPTION IS DEPRECATED AS YAHOO SHUT DOWN THEIR FREE "
                  "SEARCH API")
            #web_freq = YahooFreq()          
            #freq_name = "yahoo"
            #ignorepos_flag = True 
            #the_corpus_size = web_freq.corpus_size()         
            #get_freq_function = get_freq_web
            #mode.append( "yahoo" )   
        elif o in ( "-w", "--google" ):
            web_freq = GoogleFreq()
            freq_name = "google"
            ignorepos_flag = True
            the_corpus_size = web_freq.corpus_size()
            get_freq_function = get_freq_web
            mode.append("google")
        elif o in ( "-u", "--univ" ):
            web_freq = GoogleFreqUniv(a)
            freq_name = "google"
            ignorepos_flag = True
            the_corpus_size = web_freq.corpus_size()
            get_freq_function = get_freq_web
            mode.append("google")
        elif o in ("-T", "--web1t"):
            ignorepos_flag = True
            freq_name = "web1t"
            web1t_data_path = a
            the_corpus_size = int(read_file(web1t_data_path + "/1gms/total"))
            get_freq_function = get_freq_web1t
            mode.append("web1t")
        elif o in ("-s", "--surface" ):
            surface_flag = True
        elif o in ("-g", "--ignore-pos"):
            ignorepos_flag = True
        elif o in ("--lower", "--upper" ):
            try:
                limit = int(a)
                if limit < 0:
                    raise ValueError, "Argument of " + o + " must be positive"
                if o == "--lower" :
                    if up_limit == -1 or up_limit >= limit:
                        low_limit = limit
                    else:
                        raise ValueError, "Argument of -f >= argument of -t"
                else:
                    if low_limit == -1 or low_limit <= limit:
                        up_limit = limit
                    else:
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError as message:
                error( str(message) + "\nArgument of " + o + " must be integer")
        elif o in ("-a", "--vars" ):
            count_vars = True
        elif o in ("-l", "--lang" ):
            language = a
        elif o in ("-J", "--no-joint"):
            count_joint_frequency = False
        elif o in ("-B", "--bigrams"):
            count_bigrams = True
        elif o in ("-o", "--old"):
            Index.use_c_indexer(False)
        elif o == "--corpus-from":
            filetype_corpus_ext = a
        elif o == "--candidates-from":
            filetype_candidates_ext = a
        elif o == "--to":
            output_filetype_ext = a
        else:
            raise Exception("Bad arg: " + o)

    if mode == ["index"]:
        if surface_flag and ignorepos_flag:
            build_entry = lambda surface, lemma, pos: surface
            suffix_array = index.load("surface")
        elif surface_flag:
            build_entry = lambda surface, lemma, pos: surface +\
                                                      ATTRIBUTE_SEPARATOR + pos
            suffix_array = index.load("surface+pos")
        elif ignorepos_flag:
            build_entry = lambda surface, lemma, pos: lemma
            suffix_array = index.load("lemma")
        else:
            build_entry = lambda surface, lemma, pos: lemma +\
                                                      ATTRIBUTE_SEPARATOR + pos
            suffix_array = index.load("lemma+pos")

    else:  # Web search, entries are single surface or lemma forms
        if surface_flag:
            build_entry = lambda surface, lemma, pos: surface
        else:
            build_entry = lambda surface, lemma, pos: lemma

    if len(mode) != 1:
        error("Exactly one option -u, -w or -i, must be provided")