def treat_options(opts, arg, n_arg, usage_string): """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index mode = [] for (o, a) in opts: if o in ("-p", "--patterns"): read_patterns_file(a) mode.append("patterns") elif o in ("-n", "--ngram"): create_patterns_file(a) mode.append("ngram") elif o in ("-g", "--ignore-pos"): ignore_pos = True elif o in ("-s", "--surface"): surface_instead_lemmas = True elif o in ("-f", "--freq"): print_cand_freq = True elif o in ("-i", "--index"): corpus_from_index = True if len(mode) != 1: print >> sys.stderr, "Exactly one option, -p or -n, must be provided" usage(usage_string) sys.exit(2) treat_options_simplest(opts, arg, n_arg, usage_string)
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index mode = [] for ( o, a ) in opts: if o in ("-p", "--patterns") : read_patterns_file( a ) mode.append( "patterns" ) elif o in ( "-n", "--ngram" ) : create_patterns_file( a ) mode.append( "ngram" ) elif o in ("-g", "--ignore-pos") : ignore_pos = True elif o in ("-s", "--surface") : surface_instead_lemmas = True elif o in ("-f", "--freq") : print_cand_freq = True elif o in ("-i", "--index") : corpus_from_index = True if len(mode) != 1 : print >> sys.stderr, "Exactly one option, -p or -n, must be provided" usage( usage_string ) sys.exit( 2 ) treat_options_simplest( opts, arg, n_arg, usage_string )
def execute_command(args=sys.argv[1:]): if not args: usage() sys.exit(1) command = args[0] if command.startswith('-'): if command in ('-h', '--help'): usage(bddown_help.show_help()) elif command in ('-V', '-v', '--version'): print 'V1.54' else: usage() sys.exit(1) sys.exit(0) commands = { 'help': bd_help, 'login': login, 'download': download, 'd': download, # alias download 'export': export, 'show': show, 'config': config } if command not in commands.keys(): usage() sys.exit(1) elif '-h' in args or '--help' in args: bd_help([command]) sys.exit(0) else: commands[command](args[1:])
def bd_help(args): if len(args) == 1: helper = getattr(bddown_help, args[0].lower(), bddown_help.help) usage(helper) elif len(args) == 0: usage(bddown_help.show_help) else: usage(bddown_help.help)
def create_patterns_file( ngram_range ) : """ Create an artificial list of MWE patterns in which all the parts of the words are wildcards. Such artificial patterns match every ngram of size n, which is exactly what we want to do with the option -n. This may seem a weird way to extract ngrams, but it allows a single transparent candidate extraction function, treat_sentence. @param ngram_range String argument of the -n option, has the form "<min>:<max>" FIXMEFIXMEFIXME """ global patterns, usage_string, shortest_pattern, longest_pattern result = interpret_ngram( ngram_range ) if result : ( shortest_pattern, longest_pattern ) = result patterns.append(build_generic_pattern(shortest_pattern, longest_pattern)) else : print >> sys.stderr, "The format of the argument must be <min>:<max>" print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10" usage( usage_string ) sys.exit( 2 )
def create_patterns_file(ngram_range): """ Create an artificial list of MWE patterns in which all the parts of the words are wildcards. Such artificial patterns match every ngram of size n, which is exactly what we want to do with the option -n. This may seem a weird way to extract ngrams, but it allows a single transparent candidate extraction function, treat_sentence. @param ngram_range String argument of the -n option, has the form "<min>:<max>" FIXMEFIXMEFIXME """ global patterns, usage_string, shortest_pattern, longest_pattern result = interpret_ngram(ngram_range) if result: (shortest_pattern, longest_pattern) = result patterns.append( build_generic_pattern(shortest_pattern, longest_pattern)) else: print >> sys.stderr, "The format of the argument must be <min>:<max>" print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10" usage(usage_string) sys.exit(2)
'value': key } figure = process(result, source_dir, slug, key, figure) language = source_dir.lstrip('_') json.dump(result, sys.stdout) def process(result, source_dir, slug, base, figure_start): filename = os.path.join(source_dir, '{}.md'.format(slug)) with open(filename, 'r') as reader: content = reader.read() headings = SECTION_PAT.findall(content) for (h, i) in zip(headings, range(1, len(headings) + 1)): result[h] = { 'slug': slug, 'text': 'Section', 'value': '{}.{}'.format(base, i) } figures = FIGURE_PAT.findall(content) for (f, i) in zip(figures, range(figure_start, len(figures) + figure_start)): result[f] = {'slug': slug, 'text': 'Figure', 'value': '{}'.format(i)} return len(figures) + figure_start if __name__ == '__main__': if len(sys.argv) != 3: usage('make_toc.py config_file source_dir') main(sys.argv[1], sys.argv[2])
print('==frontmatter==\n') with open(os.path.join(source_dir, 'index.html')) as reader: get_main_div(reader) print('==mainmatter==\n') for filename in make_filenames(source_dir, toc['lessons']): with open(filename, 'r') as reader: get_main_div(reader) print('==midpoint==\n') for filename in make_filenames(source_dir, toc['extras']): with open(filename, 'r') as reader: get_main_div(reader) def make_filenames(source_dir, slugs): '''Turn slugs into filenames.''' return [os.path.join(source_dir, '{}.html'.format(s)) for s in slugs] #------------------------------------------------------------------------------- if __name__ == '__main__': if len(sys.argv) == 1: get_main_div(sys.stdin) elif len(sys.argv) == 2: get_all(sys.argv[1]) else: usage('get_body.py [source_dir]')
for (l, r) in zip_longest(left, right, fillvalue='') ] if __name__ == '__main__': single, multi = None, None options = { 'collapse_comments': True, 'language': None, 'names_only': False, 'rejoin_lines': True, 'verbose': False } choices, extras = getopt.getopt(sys.argv[1:], 'aCd:f:Jnv') if len(extras) != 1: usage( 'mismatch.py [-a | -d dir | -f file] [-C] [-J] [-n] [-v] language') options['language'] = extras[0] for (opt, arg) in choices: if opt == '-a': pass elif opt == '-C': options['collapse_comments'] = False elif opt == '-d': multi = arg elif opt == '-f': single = arg elif opt == '-J': options['rejoin_lines'] = False elif opt == '-n': options['names_only'] = True elif opt == '-v':
for op, value in opts: if op == "-c": spider_mode = True suit = 'paladin-s' logger.info('spider mode open') elif op == '-w': with_server = True logger.info('start working with server') elif op == '-s': serial = value logger.info('read serial setting: ' + serial) elif op == '-t': suit = value logger.info('read suit setting: ' + suit) elif op == "-h": usage() sys.exit() else: usage() sys.exit() # start web retriever logger.info("start web retriever, lestining on port: " + str(web_retriever_port)) web_retriever_log = open("web.log", "a") web_retriever = RunCmd([ 'node', 'web retriever/ui/main.js', '--serial', serial, '--forward-port', str(web_forward_port), '--server-port', str(web_retriever_port), '--output', './output/' ])
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global cache_file, get_freq_function, build_entry, web_freq global the_corpus_size, freq_name global low_limit, up_limit global text_input, count_vars global language global suffix_array global count_joint_frequency surface_flag = False ignorepos_flag = False mode = [] for ( o, a ) in opts: if o in ( "-i", "--index" ) : open_index( a ) get_freq_function = get_freq_index mode.append( "index" ) elif o in ( "-y", "--yahoo" ) : print >> sys.stderr, "THIS OPTION IS DEPRECATED AS YAHOO " + \ "SHUT DOWN THEIR FREE SEARCH API" sys.exit( 3 ) #web_freq = YahooFreq() #freq_name = "yahoo" #ignorepos_flag = True #the_corpus_size = web_freq.corpus_size() #get_freq_function = get_freq_web #mode.append( "yahoo" ) elif o in ( "-w", "--google" ) : web_freq = GoogleFreq() freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append( "google" ) elif o in ( "-u", "--univ" ) : web_freq = GoogleFreqUniv( a ) freq_name = "google" ignorepos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append( "google" ) elif o in ("-s", "--surface" ) : surface_flag = True elif o in ("-g", "--ignore-pos"): ignorepos_flag = True elif o in ("-f", "--from", "-t", "--to" ) : try : limit = int(a) if limit < 0 : raise ValueError, "Argument of " + o + " must be positive" if o in ( "-f", "--from" ) : if up_limit == -1 or up_limit >= limit : low_limit = limit else : raise ValueError, "Argument of -f >= argument of -t" else : if low_limit == -1 or low_limit <= limit : up_limit = limit else : raise ValueError, "Argument of -t <= argument of -t" except ValueError, message : print >> sys.stderr, message print >> sys.stderr, "Argument of " + o + " must be integer" usage( usage_string ) sys.exit( 2 ) elif o in ("-x", "--text" ) : text_input = True
#build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8') build_entry = lambda s, l, p: (l).encode('utf-8') suffix_array = index.load("lemma") else : build_entry = lambda s, l, p: (l + ATTRIBUTE_SEPARATOR + p).encode('utf-8') suffix_array = index.load("lemma+pos") else : # Web search, entries are single surface or lemma forms if surface_flag : build_entry = lambda s, l, p: s.encode('utf-8') else : build_entry = lambda s, l, p: l.encode('utf-8') if len(mode) != 1 : print >> sys.stderr, "Exactly one option -u, -w or -i, must be provided" usage( usage_string ) sys.exit( 2 ) elif text_input and web_freq is None : print >> sys.stderr, "-x option MUST be used with either -u or -w" usage( usage_string ) sys.exit( 2 ) treat_options_simplest( opts, arg, n_arg, usage_string ) ################################################################################ # MAIN SCRIPT longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\ "from=", "to=", "text", "vars", "lang=", "no-joint", "univ=" ] arg = read_options( "ywi:vgsf:t:xal:Ju:", longopts, treat_options, -1, usage_string )
filename = os.path.join(source_dir, '{}.md'.format(slug)) with open(filename, 'r') as reader: content = reader.read() headings = SECTION_PAT.findall(content) fill(result, headings, slug, 1, 'Section', '{base}.{i}', {'base': base}) figures = FIGURE_PAT.findall(content) fill(result, figures, slug, counters['figure'], 'Figure', '{i}', {}) counters['figure'] += len(figures) tables = TABLE_PAT.findall(content) fill(result, tables, slug, counters['table'], 'Table', '{i}', {}) counters['table'] += len(tables) def fill(result, items, slug, start, text, fmt, values): for (k, i) in zip(items, range(start, start + len(items))): values['i'] = i result[k] = { 'slug': slug, 'text': text, 'value': fmt.format(**values) } if __name__ == '__main__': if len(sys.argv) != 2: usage('make_toc.py language') main(sys.argv[1])
print('==frontmatter==\n') with open(os.path.join(source_dir, 'index.html')) as reader: get_main_div(reader) print('==mainmatter==\n') for filename in make_filenames(source_dir, toc['lessons']): with open(filename, 'r') as reader: get_main_div(reader) print('==midpoint==\n') for filename in make_filenames(source_dir, toc['extras']): with open(filename, 'r') as reader: get_main_div(reader) def make_filenames(source_dir, slugs): '''Turn slugs into filenames.''' return [os.path.join(source_dir, s, 'index.html') for s in slugs] #------------------------------------------------------------------------------- if __name__ == '__main__': if len(sys.argv) == 1: get_main_div(sys.stdin) elif len(sys.argv) == 3: get_all(sys.argv[1], sys.argv[2]) else: usage('get_body.py [config_file source_dir]')
# All handlers. HANDLERS = [ ExerciseAndSolution, ReplaceInclusion, GlossaryEntry, CrossRef, Figure, FigureRmd, FigureRef, Table, TableRef, Noindent, CodeBlock, Citation, Newline, PdfToSvg, GifToPng, Quote, Section, Subsection, Subsubsection, BibliographyTitle, FrontMatter, MainMatter, Midpoint, SpecialCharacters ] def main(which, language, include_dir): ''' Apply all pre- or post-processing handlers. ''' lines = sys.stdin.readlines() crossref = get_crossref(language) for handler in HANDLERS: h = handler(crossref, include_dir) lines = getattr(h, which)(lines) sys.stdout.writelines(lines) if __name__ == '__main__': USAGE = 'transform.py [--pre | --post] language include_dir' if len(sys.argv) != 4: usage(USAGE) elif sys.argv[1] not in ['--pre', '--post']: usage(USAGE) else: main(sys.argv[1].lstrip('-'), sys.argv[2], sys.argv[3])
build_entry = lambda s, l, p: (s + SEPARATOR + WILDCARD).encode('utf-8') elif surface_flag : build_entry = lambda s, l, p: (s + SEPARATOR + p).encode('utf-8') elif pos_flag : build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8') else : build_entry = lambda s, l, p: (l + SEPARATOR + p).encode('utf-8') else : # Web search, entries are single surface or lemma forms if surface_flag : build_entry = lambda s, l, p: s.encode('utf-8') else : build_entry = lambda s, l, p: l.encode('utf-8') if len(mode) != 1 : print >> sys.stderr, "Exactly one option -y, -w or -i, must be provided" usage( usage_string ) sys.exit( 2 ) elif text_input and web_freq is None : print >> sys.stderr, "-x option MUST be used with either -y or -w" usage( usage_string ) sys.exit( 2 ) treat_options_simplest( opts, arg, n_arg, usage_string ) ################################################################################ # MAIN SCRIPT longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\ "from=", "to=", "text", "vars", "lang=" ] arg = read_options( "ywi:vgsf:t:xal:", longopts, treat_options, -1, usage_string )
def treat_options( opts, arg, n_arg, usage_string ) : """ Callback function that handles the command line options of this script. @param opts The options parsed by getopts. Ignored. @param arg The argument list parsed by getopts. @param n_arg The number of arguments expected for this script. """ global cache_file, get_freq_function, build_entry, web_freq global the_corpus_size, freq_name global low_limit, up_limit global text_input, count_vars global language surface_flag = False pos_flag = False mode = [] for ( o, a ) in opts: if o in ( "-i", "--index" ) : open_index( a ) get_freq_function = get_freq_index mode.append( "index" ) elif o in ( "-y", "--yahoo" ) : web_freq = YahooFreq() freq_name = "yahoo" pos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append( "yahoo" ) elif o in ( "-w", "--google" ) : web_freq = GoogleFreq() freq_name = "google" pos_flag = True the_corpus_size = web_freq.corpus_size() get_freq_function = get_freq_web mode.append( "google" ) elif o in ("-s", "--surface" ) : surface_flag = True elif o in ("-g", "--ignore-pos"): pos_flag = True elif o in ("-f", "--from", "-t", "--to" ) : try : limit = int(a) if limit < 0 : raise ValueError, "Argument of " + o + " must be positive" if o in ( "-f", "--from" ) : if up_limit == -1 or up_limit >= limit : low_limit = limit else : raise ValueError, "Argument of -f >= argument of -t" else : if low_limit == -1 or low_limit <= limit : up_limit = limit else : raise ValueError, "Argument of -t <= argument of -t" except ValueError, message : print >> sys.stderr, message print >> sys.stderr, "Argument of " + o + " must be integer" usage( usage_string ) sys.exit( 2 ) elif o in ("-x", "--text" ) : text_input = True
''' Main driver: read bibliography from stdin, format, and print. ''' print(HEADER.format(language)) text = sys.stdin.read() if text: try: source = bibtexparser.loads(text).entries for entry in source: for h in HANDLERS[entry['ENTRYTYPE']]: if type(h) is tuple: prefix, func = h text = func(entry) if text: sys.stdout.write(prefix + text) elif callable(h): sys.stdout.write(h(entry)) else: sys.stdout.write(h) sys.stdout.write('\n\n') except Exception as e: sys.stderr.write('\nERROR {}:: {}\n'.format(str(e), str(entry))) print(FOOTER) # Command-line launch. if __name__ == '__main__': if len(sys.argv) != 2: usage('bib2m language < input > output') main(sys.argv[1])