def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global ignore_pos global surface_instead_lemmas global input_filetype_ext global output_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-g", "--ignore-pos") : ignore_pos = True elif o in ("-s", "--surface") : surface_instead_lemmas = True else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global measures global supported_measures global main_freq_name global join_all_contrastive treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-m", "--measures"): try: measures = [] measures = interpret_measures(a) except ValueError as message: error( str(message) + "\nargument must be list separated by " "\":\" and containing the names: " + str(supported_measures)) elif o in ("-o", "--original"): main_freq_name = a elif o in ("-a", "--all"): join_all_contrastive = True if not main_freq_name: error("Option -o is mandatory")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global algoname global lower_attr global input_filetype_ext global output_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-l","--lemmas" ) : lower_attr = "lemma" elif o in ("-a", "--algorithm"): algoname = a.lower() elif o in ("-m", "-x"): error( "Deprecated options -x and -m. Run with -h for details" ) else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global simplify global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) simplify = simplify_ptb for (o, a) in opts: if o in ("-p", "--palavras"): simplify = simplify_palavras elif o in ("-G", "--genia"): simplify = simplify_genia elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global limit global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-n", "--number"): try: limit = int( a ) if limit < 0: raise ValueError except ValueError: error("You must provide a positive " \ "integer value as argument of -n option.") else: raise Exception("Bad arg")
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global executable_w global executable_beg global executable_end global input_filetype_ext global output_filetype_ext util.treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o == "--begin": executable_beg = compile(a, "<cmdline:--begin>", "exec") elif o == "--end": executable_end = compile(a, "<cmdline:--end>", "exec") elif o in ("-w", "--each-word"): executable_w = compile(a, "<cmdline:--each-word>", "exec") else: raise Exception("Bad arg " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global web_freq treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for (o, a) in opts: if o in ("-y", "--yahoo"): web_freq = YahooFreq() mode.append("yahoo") elif o in ("-w", "--google"): web_freq = GoogleFreq() mode.append("google") if len(mode) > 1: error("At most one option -y or -w, should be provided")
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global executable_w global executable_beg global executable_end global input_filetype_ext global output_filetype_ext util.treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o == "--begin": executable_beg = compile(a, "<cmdline:--begin>", "exec") elif o == "--end": executable_end = compile(a, "<cmdline:--end>", "exec") elif o in ("-w", "--each-word"): executable_w = compile(a, "<cmdline:--each-word>", "exec") else: raise Exception("Bad arg " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global combination global supported_combination global main_freq treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ( "-c", "--combination" ) : try : combination = [] combination = interpret_combinations( a ) except ValueError as message : print >> sys.stderr, message print >> sys.stderr, "ERROR: argument must be list separated"+ \ "by \":\" and containing the names: "+\ str( supported_combination ) usage( usage_string ) sys.exit( 2 ) elif o in ( "-o", "--original" ) : main_freq = a
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string The usage string for the current script. """ global attributes treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-a", "--attributes"): attributes = a.split(":") for attr in attributes: if attr not in WORD_ATTRIBUTES: error("Unknown attribute '%s'!" % attr) if attributes is None: print >> sys.stderr, "The option -a <attributes> is mandatory." usage(usage_string) sys.exit(2)
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global reference_fname global mwe_evaluator global corpus_filetype_ext global reference_filetype_ext sentence_aligner_class = NaiveSentenceAligner mwe_evaluator_class = ExactMatchMWEEvaluator treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-r", "--reference"): reference_fname = a elif o in ("--sentence-aligner"): sentence_aligner_class = SENTENCE_ALIGNERS[a] elif o in ("-e", "--evaluator"): mwe_evaluator_class = MWE_EVALUATORS[a] elif o == "--corpus-from": corpus_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) if not reference_fname: error("No reference file given!") sentence_aligner = sentence_aligner_class() mwe_evaluator = mwe_evaluator_class(sentence_aligner)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global input_filetype_ext global output_filetype_ext global append_pos_tag global clean_special treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == ("--from"): input_filetype_ext = a elif o == ("--to"): output_filetype_ext = a elif o == "--append-pos-tag": if a in ("coarse","fine"): append_pos_tag = a else: error("Expected \"coarse\" or \"fine\", found " + a) elif o == "--clean-special": clean_special = True else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global lemmapos global input_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True elif o in ("-p", "--lemmapos") : lemmapos = True elif o in ("-f", "--freq-source") : freq_source = a elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global web_freq treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for ( o, a ) in opts: if o in ( "-y", "--yahoo" ): web_freq = YahooFreq() mode.append("yahoo") elif o in ( "-w", "--google" ): web_freq = GoogleFreq() mode.append("google") if len(mode) > 1: error("At most one option -y or -w, should be provided")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number os arguments expected for this script. @param usage_string Instructions that appear if you run the program with the wrong parameters or options. """ global morphg_folder global morphg_file global generate_text treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-m", "--morphg"): morphg_folder, morphg_file = os.path.split(a) elif o in ("-x", "--moses"): generate_text = True if not os.path.exists(os.path.join(morphg_folder, morphg_file)): warn("morphg not found !!! - outputting analysed forms") morphg_file = None morphg_folder = None
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string The usage string for the current script. """ global attributes treat_options_simplest( opts, arg, n_arg, usage_string ) for (o, a) in opts: if o in ("-a", "--attributes"): attributes = a.split(":") for attr in attributes: if attr not in WORD_ATTRIBUTES: error("Unknown attribute '%s'!" % attr) if attributes is None: print >>sys.stderr, "The option -a <attributes> is mandatory." usage(usage_string) sys.exit(2)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global input_filetype_ext global output_filetype_ext global regex_word_lemma global regex_word_surface global regex_word_pos global regex_word_syn global take_lemma treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("--from"): input_filetype_ext = a elif o in ("--to"): output_filetype_ext = a elif o == "--keep-empty-words": keep_empty_words = True elif o == "--word-lemmas": take_lemma = True elif o == "--word-lemmas-matching": regex_word_lemma = a elif o == "--word-surfaces-matching": regex_word_surface = a elif o == "--word-pos-matching": regex_word_pos = a elif o == "--word-syn-matching": regex_word_syn = a else: raise Exception("Bad arg")
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global reference_fname global mwe_evaluator global corpus_filetype_ext global reference_filetype_ext sentence_aligner_class = NaiveSentenceAligner mwe_evaluator_class = ExactMatchMWEEvaluator treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-r", "--reference"): reference_fname = a elif o in ("--sentence-aligner"): sentence_aligner_class = SENTENCE_ALIGNERS[a] elif o in ("-e", "--evaluator"): mwe_evaluator_class = MWE_EVALUATORS[a] elif o == "--corpus-from": corpus_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) if not reference_fname: error("No reference file given!") sentence_aligner = sentence_aligner_class() mwe_evaluator = mwe_evaluator_class(sentence_aligner)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global limit global entity_buffer global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for ( o, a ) in opts: if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-n", "--number"): try: limit = int(a) entity_buffer = [None] * limit if limit < 0: raise ValueError except ValueError: error("You must provide a positive " + \ "integer value as argument of -n option.") else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global measures global supported_measures global main_freq_name global join_all_contrastive treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ( "-m", "--measures" ) : try : measures = [] measures = interpret_measures( a ) except ValueError as message : error( str(message)+"\nargument must be list separated by " "\":\" and containing the names: "+ str( supported_measures )) elif o in ( "-o", "--original" ) : main_freq_name = a elif o in ( "-a", "--all" ) : join_all_contrastive = True if not main_freq_name : error( "Option -o is mandatory")
def treat_options( opts, arg, n_arg, usage_string): """ Callback function that handles the command options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number os arguments expected for this script. @param usage_string Instructions that appear if you run the program with the wrong parameters or options. """ global morphg_folder global morphg_file global generate_text treat_options_simplest( opts, arg, n_arg, usage_string ) for (o, a) in opts: if o in ("-m","--morphg"): morphg_folder, morphg_file = os.path.split( a ) elif o in ("-x","--moses"): generate_text = True if not os.path.exists( os.path.join( morphg_folder, morphg_file ) ) : warn( "morphg not found !!! - outputting analysed forms" ) morphg_file = None morphg_folder = None
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global combination global supported_combination global main_freq treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-c", "--combination"): try: combination = [] combination = interpret_combinations(a) except ValueError as message: print >> sys.stderr, message print >> sys.stderr, "ERROR: argument must be list separated"+ \ "by \":\" and containing the names: "+\ str( supported_combination ) usage(usage_string) sys.exit(2) elif o in ("-o", "--original"): main_freq = a
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global reference_fname global mwe_evaluator treat_options_simplest(opts, arg, n_arg, usage_string)
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global filetype_corpus_ext global filetype_candidates_ext global output_filetype_ext global action_annotate global action_filter treat_options_simplest(opts, arg, n_arg, usage_string) detector_class = ContiguousLemmaDetector candidates_fnames = [] n_gaps = None for (o, a) in opts: if o in ("-c", "--candidates"): candidates_fnames.append(a) elif o in ("-d", "--detector"): detector_class = detectors.get(a,None) if detector_class is None : error("Unkown detector name: "+a) elif o in ("-S", "--source"): detector_class = SourceDetector elif o in ("-g", "--gaps"): n_gaps = int(a) elif o == "--corpus-from": filetype_corpus_ext = a elif o == "--candidates-from": filetype_candidates_ext = a elif o == "--to": output_filetype_ext = a elif o == "--filter": action_annotate = False action_filter = True elif o == "--filter-and-annot": action_filter = True else: raise Exception("Bad arg: " + o) if not candidates_fnames: error("No candidates file given!") if detector_class == SourceDetector and n_gaps is not None: error('Bad arguments: method "Source" with "--gaps"') c = CandidatesHandler() verbose("Reading MWE list from candidates file") filetype.parse(candidates_fnames, c, filetype_candidates_ext) verbose("MWE list loaded in memory successfully") global detector detector = detector_class(c.info, n_gaps)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global filetype_corpus_ext global filetype_candidates_ext global output_filetype_ext global action_annotate global action_filter treat_options_simplest(opts, arg, n_arg, usage_string) detector_class = ContiguousLemmaDetector candidates_fnames = [] n_gaps = None for (o, a) in opts: if o in ("-c", "--candidates"): candidates_fnames.append(a) elif o in ("-d", "--detector"): detector_class = detectors.get(a, None) if detector_class is None: error("Unkown detector name: " + a) elif o in ("-S", "--source"): detector_class = SourceDetector elif o in ("-g", "--gaps"): n_gaps = int(a) elif o == "--corpus-from": filetype_corpus_ext = a elif o == "--candidates-from": filetype_candidates_ext = a elif o == "--to": output_filetype_ext = a elif o == "--filter": action_annotate = False action_filter = True elif o == "--filter-and-annot": action_filter = True else: raise Exception("Bad arg: " + o) if not candidates_fnames: error("No candidates file given!") if detector_class == SourceDetector and n_gaps is not None: error('Bad arguments: method "Source" with "--gaps"') c = CandidatesHandler() verbose("Reading MWE list from candidates file") filetype.parse(candidates_fnames, c, filetype_candidates_ext) verbose("MWE list loaded in memory successfully") global detector detector = detector_class(c.info, n_gaps)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string The usage string printed if the arguments are wrong. """ global first_header global first_rater global calculate_pairwise global calculate_confusion global separator global distances_matrix global unknown treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-r", "--raters"): verbose("First row in file ignored -> considered as rater labels") first_header = True if o in ("-i", "--items"): verbose( "First column in file ignored -> considered as item labels") first_rater = 1 if o in ("-p", "--pairwise"): verbose("Computing pairwise coefficients") calculate_pairwise = True if o in ("-u", "--unknown"): verbose("Unknown value - TODO: implement: " + a) unknown = a if o in ("-s", "--separator"): verbose("Field separator: " + a) separator = a if len(separator) > 1: warn("Multi-char field separator!") if o in ("-d", "--distance"): verbose("Calculating weighted coefficients using distance file") distances_matrix = read_distances(a) if distances_matrix is None: warn( "Error in distance matrix! Weighted coefficients will use 1.0 as default distance" ) if o in ("-c", "--confusion"): verbose("Calculating confusion matrices") calculate_confusion = True
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global input_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--from": input_filetype_ext = a else: raise Exception("Bad arg")
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global pre_gs global ignore_pos global gs_name global ignore_case global lemma_or_surface global input_filetype_ext global reference_filetype_ext ref_name = None treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-r", "--reference"): ref_name = a elif o in ("-g", "--ignore-pos"): ignore_pos = True elif o in ("-c", "--case"): ignore_case = False elif o in ("-L", "--lemma-or-surface"): lemma_or_surface = True elif o == "--input-from": input_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) # The reference list needs to be opened after all the options are read, # since options such as -g and -c modify the way the list is represented if ref_name: filetype.parse([ref_name], ReferenceReaderHandler(), reference_filetype_ext) gs_name = re.sub(".*/", "", re.sub("\.xml", "", ref_name)) # There's no reference list... Oh oh cannot evaluate :-( if not pre_gs: error("You MUST provide a non-empty reference list!")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string The usage string printed if the arguments are wrong. """ global first_header global first_rater global calculate_pairwise global calculate_confusion global separator global distances_matrix global unknown treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ("-r", "--raters") : verbose( "First row in file ignored -> considered as rater labels") first_header = True if o in ("-i", "--items") : verbose("First column in file ignored -> considered as item labels") first_rater = 1 if o in ("-p", "--pairwise") : verbose( "Computing pairwise coefficients" ) calculate_pairwise = True if o in ("-u", "--unknown") : verbose( "Unknown value - TODO: implement: " + a ) unknown = a if o in ("-s", "--separator") : verbose( "Field separator: " + a ) separator = a if len( separator ) > 1 : warn("Multi-char field separator!") if o in ("-d", "--distance") : verbose("Calculating weighted coefficients using distance file") distances_matrix = read_distances( a ) if distances_matrix is None : warn("Error in distance matrix! Weighted coefficients will use 1.0 as default distance") if o in ("-c", "--confusion") : verbose( "Calculating confusion matrices" ) calculate_confusion = True
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global pre_gs global ignore_pos global gs_name global ignore_case global lemma_or_surface global input_filetype_ext global reference_filetype_ext ref_name = None treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ("-r", "--reference"): ref_name = a elif o in ("-g", "--ignore-pos"): ignore_pos = True elif o in ("-c", "--case"): ignore_case = False elif o in ("-L", "--lemma-or-surface"): lemma_or_surface = True elif o == "--input-from": input_filetype_ext = a elif o == "--reference-from": reference_filetype_ext = a else: raise Exception("Bad arg: " + o) # The reference list needs to be opened after all the options are read, # since options such as -g and -c modify the way the list is represented if ref_name : filetype.parse([ref_name], ReferenceReaderHandler(), reference_filetype_ext) gs_name = re.sub( ".*/", "", re.sub( "\.xml", "", ref_name ) ) # There's no reference list... Oh oh cannot evaluate :-( if not pre_gs : error("You MUST provide a non-empty reference list!")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global sent_handler global lower_attr global input_filetype_ext global output_filetype_ext ctxinfo = util.CmdlineContextInfo(opts) util.treat_options_simplest(opts, arg, n_arg, usage_string) sent_handler = LowercaserHandler.handle_sentence_simple # default for o, a in ctxinfo.iter(opts): if o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-l","--lemmas" ) : lower_attr = "lemma" elif o in ("-a", "--algorithm"): algoname = a.lower() if algoname == "simple" : # Redundant, kept for clarity sent_handler = LowercaserHandler.handle_sentence_simple elif algoname == "complex" : sent_handler = LowercaserHandler.handle_sentence_complex elif algoname == "aggressive" : # Redundant, kept for clarity sent_handler = LowercaserHandler.handle_sentence_aggressive else : ctxinfo.error("Bad algorithm name `{name}`", name=algoname) elif o == "-m": ctxinfo.error("Deprecated option. Use --from=Moses instead" ) elif o == "-x": ctxinfo.error("Deprecated option. " \ "Use --from=PlainCorpus instead") else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global limit treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ("-n", "--number") : try : limit = int( a ) if limit < 0 : raise ValueError except ValueError : error("You must provide a positive integer value as argument " "of -n option.")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global base_attr global min_ngram global max_ngram global min_frequency global ngram_counts global selected_candidates global use_shelve global input_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for (o, a) in opts: if o in ("-s", "--surface"): surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq"): min_frequency = int(a) elif o in ("-n", "--ngram"): (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a) elif o in ("-S", "--shelve"): use_shelve = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global base_attr global min_ngram global max_ngram global min_frequency global ngram_counts global selected_candidates global use_shelve global input_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq") : min_frequency = int(a) elif o in ("-n", "--ngram") : (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a) elif o in ("-S", "--shelve"): use_shelve = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global limit treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-n", "--number"): try: limit = int(a) if limit < 0: raise ValueError except ValueError: error("You must provide a positive integer value as argument " "of -n option.")
def treat_options( opts, arg, n_arg, usage_string ) : """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global input_patterns global input_filetype_ext global output_filetype_ext global match_distance global non_overlapping global id_order global annotate global only_the_matching_subpart util.treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o == "--input-from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a elif o in ("-p", "--patterns"): input_patterns = filetype.parse_entities([a]) elif o in ("-d", "--match-distance") : match_distance = a elif o in ("-N", "--non-overlapping") : non_overlapping = True elif o == "--id-order": id_order = a.split(":") elif o == "--annotate": annotate = True elif o == "--only-matching": only_the_matching_subpart = True else: raise Exception("Bad arg " + o) if input_patterns is None: util.error("No patterns provided. Option --patterns is mandatory!") if only_the_matching_subpart and annotate: util.warn("Switch --only-matching disables --annotate")
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string Instructions that appear if you run the program with the wrong parameters or options. """ global sent_split global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for ( o, a ) in opts: if o in ("-s", "--sentence"): sent_split = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. @param usage_string Instructions that appear if you run the program with the wrong parameters or options. """ global sent_split global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-s", "--sentence"): sent_split = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global feat_list global ascending global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) a_or_d = [] for (o, a) in opts: if o in ("-f", "--feat"): #import pdb #pdb.set_trace() feat_list = treat_feat_list(a) elif o in ("-a", "--asc"): ascending = True a_or_d.append("a") elif o in ("-d", "--desc"): ascending = False a_or_d.append("d") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg") if len(a_or_d) > 1: warn("You must provide only one option, -a OR -d. " \ "Only the last one will be considered.")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global feat_list global ascending global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) a_or_d = [] for ( o, a ) in opts: if o in ("-f", "--feat"): #import pdb #pdb.set_trace() feat_list = treat_feat_list(a) elif o in ("-a", "--asc"): ascending = True a_or_d.append("a") elif o in ("-d", "--desc"): ascending = False a_or_d.append("d") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg") if len(a_or_d) > 1: warn("You must provide only one option, -a OR -d. " \ "Only the last one will be considered.")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global feat_list global ascending global print_precs treat_options_simplest(opts, arg, n_arg, usage_string) a_or_d = [] for (o, a) in opts: if o in ("-f", "--feat"): feat_list = treat_feat_list(a) elif o in ("-a", "--asc"): ascending = True a_or_d.append("a") elif o in ("-d", "--desc"): ascending = False a_or_d.append("d") elif o in ("-p", "--precs"): print_precs = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o) if len(a_or_d) > 1: warn("you should provide only one option, -a OR -d. Only the last one"+\ " will be considered.") if not feat_list: error("You MUST provide at least one feature with -f")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global measures global supported_measures global main_freq global not_normalize_mle global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-m", "--measures"): try: measures = interpret_measures(a) except ValueError as message: error( str(message) + "\nargument must be list separated by " "\":\" and containing the names: " + str(supported_measures)) elif o in ("-o", "--original"): main_freq = a elif o in ("-u", "--unnorm-mle"): not_normalize_mle = True elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global measures global supported_measures global main_freq global not_normalize_mle global input_filetype_ext global output_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) for ( o, a ) in opts: if o in ( "-m", "--measures" ) : try : measures = interpret_measures( a ) except ValueError as message : error( str(message) + "\nargument must be list separated by " "\":\" and containing the names: " + str( supported_measures )) elif o in ( "-o", "--original" ) : main_freq = a elif o in ( "-u", "--unnorm-mle" ) : not_normalize_mle = True elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global feat_list global ascending global print_precs treat_options_simplest(opts, arg, n_arg, usage_string) a_or_d = [] for (o, a) in opts: if o in ("-f", "--feat"): feat_list = treat_feat_list(a) elif o in ("-a", "--asc"): ascending = True a_or_d.append("a") elif o in ("-d", "--desc"): ascending = False a_or_d.append("d") elif o in ("-p", "--precs"): print_precs = True elif o == "--from": input_filetype_ext = a else: raise Exception("Bad arg: " + o) if len(a_or_d) > 1: warn("you should provide only one option, -a OR -d. Only the last one" + " will be considered.") if not feat_list: error("You MUST provide at least one feature with -f")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global used_attributes global basename global build_entry global use_text_format global input_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) used_attributes = ["lemma", "pos", "surface", "syn"] for (o, a) in opts: if o in ("-i", "--index"): basename = a elif o == "--from": input_filetype_ext = a elif o in ("-a", "--attributes"): used_attributes = a.split(":") elif o in ("-m", "--moses"): use_text_format = "moses" elif o in ("-c", "--conll"): use_text_format = "conll" elif o in ("-o", "--old"): indexlib.Index.use_c_indexer(False) if basename is None: error("You must provide a filename for the index.\n" "Option -i is mandatory.")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global used_attributes global basename global build_entry global use_text_format global input_filetype_ext treat_options_simplest( opts, arg, n_arg, usage_string ) used_attributes = ["lemma", "pos", "surface", "syn"] for ( o, a ) in opts: if o in ("-i", "--index") : basename = a elif o == "--from": input_filetype_ext = a elif o in ("-a", "--attributes"): used_attributes = a.split(":") elif o in ("-m", "--moses"): use_text_format = "moses" elif o in ("-c", "--conll"): use_text_format = "conll" elif o in ("-o", "--old"): indexlib.Index.use_c_indexer(False) if basename is None: error("You must provide a filename for the index.\n" "Option -i is mandatory.")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global corpus_from_index global base_attr global min_ngram global max_ngram global min_frequency treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] for ( o, a ) in opts: if o in ("-s", "--surface") : surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq") : min_frequency = int(a) elif o in ("-n", "--ngram") : (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-i", "--index") : corpus_from_index = True elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global surface_instead_lemmas global glue global corpus_from_index global base_attr global min_ngram global max_ngram global min_frequency treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] for (o, a) in opts: if o in ("-s", "--surface"): surface_instead_lemmas = True base_attr = 'surface' elif o in ("-f", "--freq"): min_frequency = int(a) elif o in ("-n", "--ngram"): (min_ngram, max_ngram) = interpret_ngram(a) elif o in ("-i", "--index"): corpus_from_index = True elif o in ("-G", "--glue"): if a == "scp": glue = scp_glue else: error("Unknown glue function '%s'" % a)
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global thresh_source global thresh_value global equals_name global equals_value global reverse global minlength global maxlength global min_mweoccurs global max_mweoccurs global input_filetype_ext global output_filetype_ext treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-t", "--threshold"): threshold = interpret_threshold(a) if threshold: (thresh_source, thresh_value) = threshold else: error("The format of the -t argument must be <source>:" "<value>\n<source> must be a valid corpus name and " "<value> must be a non-negative integer") elif o in ("-e", "--equals"): equals = interpret_equals(a) if equals: (equals_name, equals_value) = equals else: error("The format of the -e argument must be <name>:" "<value>\n<name> must be a valid feat name and " "<value> must be a non-empty string") elif o in ("-p", "--patterns"): verbose("Reading patterns file") global patterns patterns = filetype.parse_entities([a]) elif o in ("-r", "--reverse"): reverse = True verbose("Option REVERSE active") elif o in ("-i", "--minlength"): minlength = interpret_length(a, "minimum") elif o in ("-a", "--maxlength"): maxlength = interpret_length(a, "maximum") elif o == "--min-mweoccurs": min_mweoccurs = interpret_length(a, "minimum") elif o == "--max-mweoccurs": max_mweoccurs = interpret_length(a, "maximum") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o) if minlength > maxlength: warn("minlength should be <= maxlength") if min_mweoccurs > max_mweoccurs: warn("min-mweoccurs should be <= max-mweoccurs")
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns global ignore_pos global surface_instead_lemmas global print_cand_freq global print_source global match_distance global non_overlapping global input_filetype_ext global output_filetype_ext global id_order treat_options_simplest( opts, arg, n_arg, usage_string ) mode = [] patterns_file = None for ( o, a ) in opts: if o in ("-p", "--patterns") : mode.append( "patterns" ) patterns_file = a elif o in ( "-n", "--ngram" ) : create_patterns_file( a ) mode.append( "ngram" ) elif o in ("-g", "--ignore-pos") : ignore_pos = True elif o in ("-d", "--match-distance") : match_distance = a elif o in ("-N", "--non-overlapping") : non_overlapping = True elif o in ("-s", "--surface") : surface_instead_lemmas = True elif o in ("-S", "--source") : print_source = True elif o in ("-f", "--freq") : print_cand_freq = True elif o in ("-i", "--index") : input_filetype_ext = "BinaryIndex" warn("Option -i is deprecated; use --from=BinaryIndex") elif o == "--id-order": id_order = a.split(":") elif o == "--from" : input_filetype_ext = a elif o == "--to" : output_filetype_ext = a else: raise Exception("Bad flag") if non_overlapping and match_distance == "All": # If we are taking all matches, we need to be able to overlap... error("Conflicting options: --match-distance=All and --non-overlapping") if len(mode) != 1 : error("Exactly one option, -p or -n, must be provided") if "patterns" in mode: global patterns patterns = filetype.parse_entities([patterns_file])
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns global ignore_pos global surface_instead_lemmas global print_cand_freq global print_source global match_distance global non_overlapping global input_filetype_ext global output_filetype_ext global id_order treat_options_simplest(opts, arg, n_arg, usage_string) mode = [] patterns_file = None for (o, a) in opts: if o in ("-p", "--patterns"): mode.append("patterns") patterns_file = a elif o in ("-n", "--ngram"): create_patterns_file(a) mode.append("ngram") elif o in ("-g", "--ignore-pos"): ignore_pos = True elif o in ("-d", "--match-distance"): match_distance = a elif o in ("-N", "--non-overlapping"): non_overlapping = True elif o in ("-s", "--surface"): surface_instead_lemmas = True elif o in ("-S", "--source"): print_source = True elif o in ("-f", "--freq"): print_cand_freq = True elif o in ("-i", "--index"): input_filetype_ext = "BinaryIndex" warn("Option -i is deprecated; use --from=BinaryIndex") elif o == "--id-order": id_order = a.split(":") elif o == "--from": input_filetype_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad flag") if non_overlapping and match_distance == "All": # If we are taking all matches, we need to be able to overlap... error( "Conflicting options: --match-distance=All and --non-overlapping") if len(mode) != 1: error("Exactly one option, -p or -n, must be provided") if "patterns" in mode: global patterns patterns = filetype.parse_entities([patterns_file])
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global cache_file, get_freq_function, build_entry, web_freq global the_corpus_size, freq_name global low_limit, up_limit global count_vars global language global suffix_array global count_joint_frequency global count_bigrams global web1t_data_path global filetype_corpus_ext global filetype_candidates_ext global output_filetype_ext surface_flag = False ignorepos_flag = False mode = [] treat_options_simplest(opts, arg, n_arg, usage_string) for (o, a) in opts: if o in ("-i", "--index"): open_index(a) get_freq_function = get_freq_index mode.append("index") elif o in ("-y", "--yahoo"): error("THIS OPTION IS DEPRECATED AS YAHOO SHUT DOWN THEIR FREE " "SEARCH API") #web_freq = YahooFreq() #freq_name = "yahoo" #ignorepos_flag = True #the_corpus_size = web_freq.corpus_size() #get_freq_function = get_freq_web #mode.append( "yahoo" ) elif o in ("-w", "--google"): web_freq = GoogleFreq() freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append("google") elif o in ("-u", "--univ"): web_freq = GoogleFreqUniv(a) freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append("google") elif o in ("-T", "--web1t"): ignorepos_flag = True freq_name = "web1t" web1t_data_path = a the_corpus_size = int(read_file(web1t_data_path + "/1gms/total")) get_freq_function = get_freq_web1t mode.append("web1t") elif o in ("-s", "--surface"): surface_flag = True elif o in ("-g", "--ignore-pos"): ignorepos_flag = True elif o in ("--lower", "--upper"): try: limit = int(a) if limit < 0: raise ValueError, "Argument of " + o + " must be positive" if o == "--lower": if up_limit == -1 or up_limit >= limit: low_limit = limit else: raise ValueError, "Argument of -f >= argument of -t" else: if low_limit == -1 or low_limit <= limit: up_limit = limit else: raise ValueError, "Argument of -t <= argument of -t" except ValueError as message: error(str(message) + "\nArgument of " + o + " must be integer") elif o in ("-a", "--vars"): count_vars = True elif o in ("-l", "--lang"): language = a elif o in ("-J", "--no-joint"): count_joint_frequency = False elif o in ("-B", "--bigrams"): count_bigrams = True elif o in ("-o", "--old"): Index.use_c_indexer(False) elif o == "--corpus-from": filetype_corpus_ext = a elif o == "--candidates-from": filetype_candidates_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o) if mode == ["index"]: if surface_flag and ignorepos_flag: build_entry = lambda surface, lemma, pos: surface suffix_array = index.load("surface") elif surface_flag: build_entry = lambda surface, lemma, pos: surface +\ ATTRIBUTE_SEPARATOR + pos suffix_array = index.load("surface+pos") elif ignorepos_flag: build_entry = lambda surface, lemma, pos: lemma suffix_array = index.load("lemma") else: build_entry = lambda surface, lemma, pos: lemma +\ ATTRIBUTE_SEPARATOR + pos suffix_array = index.load("lemma+pos") else: # Web search, entries are single surface or lemma forms if surface_flag: build_entry = lambda surface, lemma, pos: surface else: build_entry = lambda surface, lemma, pos: lemma if len(mode) != 1: error("Exactly one option -u, -w or -i, must be provided")
def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global cache_file, get_freq_function, build_entry, web_freq global the_corpus_size, freq_name global low_limit, up_limit global count_vars global language global suffix_array global count_joint_frequency global count_bigrams global web1t_data_path global filetype_corpus_ext global filetype_candidates_ext global output_filetype_ext surface_flag = False ignorepos_flag = False mode = [] treat_options_simplest(opts, arg, n_arg, usage_string) for ( o, a ) in opts: if o in ( "-i", "--index" ): open_index(a) get_freq_function = get_freq_index mode.append("index") elif o in ( "-y", "--yahoo" ): error("THIS OPTION IS DEPRECATED AS YAHOO SHUT DOWN THEIR FREE " "SEARCH API") #web_freq = YahooFreq() #freq_name = "yahoo" #ignorepos_flag = True #the_corpus_size = web_freq.corpus_size() #get_freq_function = get_freq_web #mode.append( "yahoo" ) elif o in ( "-w", "--google" ): web_freq = GoogleFreq() freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append("google") elif o in ( "-u", "--univ" ): web_freq = GoogleFreqUniv(a) freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append("google") elif o in ("-T", "--web1t"): ignorepos_flag = True freq_name = "web1t" web1t_data_path = a the_corpus_size = int(read_file(web1t_data_path + "/1gms/total")) get_freq_function = get_freq_web1t mode.append("web1t") elif o in ("-s", "--surface" ): surface_flag = True elif o in ("-g", "--ignore-pos"): ignorepos_flag = True elif o in ("--lower", "--upper" ): try: limit = int(a) if limit < 0: raise ValueError, "Argument of " + o + " must be positive" if o == "--lower" : if up_limit == -1 or up_limit >= limit: low_limit = limit else: raise ValueError, "Argument of -f >= argument of -t" else: if low_limit == -1 or low_limit <= limit: up_limit = limit else: raise ValueError, "Argument of -t <= argument of -t" except ValueError as message: error( str(message) + "\nArgument of " + o + " must be integer") elif o in ("-a", "--vars" ): count_vars = True elif o in ("-l", "--lang" ): language = a elif o in ("-J", "--no-joint"): count_joint_frequency = False elif o in ("-B", "--bigrams"): count_bigrams = True elif o in ("-o", "--old"): Index.use_c_indexer(False) elif o == "--corpus-from": filetype_corpus_ext = a elif o == "--candidates-from": filetype_candidates_ext = a elif o == "--to": output_filetype_ext = a else: raise Exception("Bad arg: " + o) if mode == ["index"]: if surface_flag and ignorepos_flag: build_entry = lambda surface, lemma, pos: surface suffix_array = index.load("surface") elif surface_flag: build_entry = lambda surface, lemma, pos: surface +\ ATTRIBUTE_SEPARATOR + pos suffix_array = index.load("surface+pos") elif ignorepos_flag: build_entry = lambda surface, lemma, pos: lemma suffix_array = index.load("lemma") else: build_entry = lambda surface, lemma, pos: lemma +\ ATTRIBUTE_SEPARATOR + pos suffix_array = index.load("lemma+pos") else: # Web search, entries are single surface or lemma forms if surface_flag: build_entry = lambda surface, lemma, pos: surface else: build_entry = lambda surface, lemma, pos: lemma if len(mode) != 1: error("Exactly one option -u, -w or -i, must be provided")