コード例 #1
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index
    mode = []
    for (o, a) in opts:
        if o in ("-p", "--patterns"):
            read_patterns_file(a)
            mode.append("patterns")
        elif o in ("-n", "--ngram"):
            create_patterns_file(a)
            mode.append("ngram")
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-s", "--surface"):
            surface_instead_lemmas = True
        elif o in ("-f", "--freq"):
            print_cand_freq = True
        elif o in ("-i", "--index"):
            corpus_from_index = True

    if len(mode) != 1:
        print >> sys.stderr, "Exactly one option, -p or -n, must be provided"
        usage(usage_string)
        sys.exit(2)

    treat_options_simplest(opts, arg, n_arg, usage_string)
コード例 #2
0
ファイル: candidates_old2.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index
    mode = []
    for ( o, a ) in opts:
        if o in ("-p", "--patterns") : 
            read_patterns_file( a )
            mode.append( "patterns" )
        elif o in ( "-n", "--ngram" ) :
            create_patterns_file( a )
            mode.append( "ngram" )
        elif o in ("-g", "--ignore-pos") : 
            ignore_pos = True
        elif o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        elif o in ("-f", "--freq") : 
            print_cand_freq = True
        elif o in ("-i", "--index") :
            corpus_from_index = True

    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option, -p or -n, must be provided"
        usage( usage_string )
        sys.exit( 2 )
        
    treat_options_simplest( opts, arg, n_arg, usage_string )
コード例 #3
0
ファイル: allPythonContent.py プロジェクト: Mondego/pyreco
def execute_command(args=sys.argv[1:]):
    if not args:
        usage()
        sys.exit(1)

    command = args[0]
    if command.startswith('-'):
        if command in ('-h', '--help'):
            usage(bddown_help.show_help())
        elif command in ('-V', '-v', '--version'):
            print 'V1.54'
        else:
            usage()
            sys.exit(1)
        sys.exit(0)

    commands = {
        'help':         bd_help,
        'login':        login,
        'download':     download,
        'd':            download,   # alias download
        'export':       export,
        'show':         show,
        'config':       config
    }

    if command not in commands.keys():
        usage()
        sys.exit(1)
    elif '-h' in args or '--help' in args:
        bd_help([command])
        sys.exit(0)
    else:
        commands[command](args[1:])
コード例 #4
0
def execute_command(args=sys.argv[1:]):
    if not args:
        usage()
        sys.exit(1)

    command = args[0]
    if command.startswith('-'):
        if command in ('-h', '--help'):
            usage(bddown_help.show_help())
        elif command in ('-V', '-v', '--version'):
            print 'V1.54'
        else:
            usage()
            sys.exit(1)
        sys.exit(0)

    commands = {
        'help': bd_help,
        'login': login,
        'download': download,
        'd': download,  # alias download
        'export': export,
        'show': show,
        'config': config
    }

    if command not in commands.keys():
        usage()
        sys.exit(1)
    elif '-h' in args or '--help' in args:
        bd_help([command])
        sys.exit(0)
    else:
        commands[command](args[1:])
コード例 #5
0
def bd_help(args):
    if len(args) == 1:
        helper = getattr(bddown_help, args[0].lower(), bddown_help.help)
        usage(helper)
    elif len(args) == 0:
        usage(bddown_help.show_help)
    else:
        usage(bddown_help.help)
コード例 #6
0
ファイル: allPythonContent.py プロジェクト: Mondego/pyreco
def bd_help(args):
    if len(args) == 1:
        helper = getattr(bddown_help, args[0].lower(), bddown_help.help)
        usage(helper)
    elif len(args) == 0:
        usage(bddown_help.show_help)
    else:
        usage(bddown_help.help)
コード例 #7
0
ファイル: candidates_old2.py プロジェクト: KWARC/mwetoolkit
def create_patterns_file( ngram_range ) :
    """
        Create an artificial list of MWE patterns in which all the parts of
        the words are wildcards. Such artificial patterns match every ngram
        of size n, which is exactly what we want to do with the option -n. This
        may seem a weird way to extract ngrams, but it allows a single 
        transparent candidate extraction function, treat_sentence.
        
        @param ngram_range String argument of the -n option, has the form 
        "<min>:<max>"        

        FIXMEFIXMEFIXME
    """        
    global patterns, usage_string, shortest_pattern, longest_pattern
    result = interpret_ngram( ngram_range )
    if result :
        ( shortest_pattern, longest_pattern ) = result
        patterns.append(build_generic_pattern(shortest_pattern, longest_pattern))
    else :
        print >> sys.stderr, "The format of the argument must be <min>:<max>"
        print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10"
        usage( usage_string )
        sys.exit( 2 )  
コード例 #8
0
def create_patterns_file(ngram_range):
    """
        Create an artificial list of MWE patterns in which all the parts of
        the words are wildcards. Such artificial patterns match every ngram
        of size n, which is exactly what we want to do with the option -n. This
        may seem a weird way to extract ngrams, but it allows a single 
        transparent candidate extraction function, treat_sentence.
        
        @param ngram_range String argument of the -n option, has the form 
        "<min>:<max>"        

        FIXMEFIXMEFIXME
    """
    global patterns, usage_string, shortest_pattern, longest_pattern
    result = interpret_ngram(ngram_range)
    if result:
        (shortest_pattern, longest_pattern) = result
        patterns.append(
            build_generic_pattern(shortest_pattern, longest_pattern))
    else:
        print >> sys.stderr, "The format of the argument must be <min>:<max>"
        print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10"
        usage(usage_string)
        sys.exit(2)
コード例 #9
0
            'value': key
        }
        figure = process(result, source_dir, slug, key, figure)

    language = source_dir.lstrip('_')
    json.dump(result, sys.stdout)


def process(result, source_dir, slug, base, figure_start):
    filename = os.path.join(source_dir, '{}.md'.format(slug))
    with open(filename, 'r') as reader:
        content = reader.read()
    headings = SECTION_PAT.findall(content)
    for (h, i) in zip(headings, range(1, len(headings) + 1)):
        result[h] = {
            'slug': slug,
            'text': 'Section',
            'value': '{}.{}'.format(base, i)
        }
    figures = FIGURE_PAT.findall(content)
    for (f, i) in zip(figures, range(figure_start,
                                     len(figures) + figure_start)):
        result[f] = {'slug': slug, 'text': 'Figure', 'value': '{}'.format(i)}
    return len(figures) + figure_start


if __name__ == '__main__':
    if len(sys.argv) != 3:
        usage('make_toc.py config_file source_dir')
    main(sys.argv[1], sys.argv[2])
コード例 #10
0
    print('==frontmatter==\n')
    with open(os.path.join(source_dir, 'index.html')) as reader:
        get_main_div(reader)

    print('==mainmatter==\n')
    for filename in make_filenames(source_dir, toc['lessons']):
        with open(filename, 'r') as reader:
            get_main_div(reader)

    print('==midpoint==\n')
    for filename in make_filenames(source_dir, toc['extras']):
        with open(filename, 'r') as reader:
            get_main_div(reader)


def make_filenames(source_dir, slugs):
    '''Turn slugs into filenames.'''

    return [os.path.join(source_dir, '{}.html'.format(s)) for s in slugs]


#-------------------------------------------------------------------------------

if __name__ == '__main__':
    if len(sys.argv) == 1:
        get_main_div(sys.stdin)
    elif len(sys.argv) == 2:
        get_all(sys.argv[1])
    else:
        usage('get_body.py [source_dir]')
コード例 #11
0
ファイル: mismatch.py プロジェクト: shwina/still-magic
        for (l, r) in zip_longest(left, right, fillvalue='')
    ]


if __name__ == '__main__':
    single, multi = None, None
    options = {
        'collapse_comments': True,
        'language': None,
        'names_only': False,
        'rejoin_lines': True,
        'verbose': False
    }
    choices, extras = getopt.getopt(sys.argv[1:], 'aCd:f:Jnv')
    if len(extras) != 1:
        usage(
            'mismatch.py [-a | -d dir | -f file] [-C] [-J] [-n] [-v] language')
    options['language'] = extras[0]
    for (opt, arg) in choices:
        if opt == '-a':
            pass
        elif opt == '-C':
            options['collapse_comments'] = False
        elif opt == '-d':
            multi = arg
        elif opt == '-f':
            single = arg
        elif opt == '-J':
            options['rejoin_lines'] = False
        elif opt == '-n':
            options['names_only'] = True
        elif opt == '-v':
コード例 #12
0
    for op, value in opts:
        if op == "-c":
            spider_mode = True
            suit = 'paladin-s'
            logger.info('spider mode open')
        elif op == '-w':
            with_server = True
            logger.info('start working with server')
        elif op == '-s':
            serial = value
            logger.info('read serial setting: ' + serial)
        elif op == '-t':
            suit = value
            logger.info('read suit setting: ' + suit)
        elif op == "-h":
            usage()
            sys.exit()
        else:
            usage()
            sys.exit()

    # start web retriever
    logger.info("start web retriever, lestining on port: " +
                str(web_retriever_port))
    web_retriever_log = open("web.log", "a")
    web_retriever = RunCmd([
        'node', 'web retriever/ui/main.js', '--serial', serial,
        '--forward-port',
        str(web_forward_port), '--server-port',
        str(web_retriever_port), '--output', './output/'
    ])
コード例 #13
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global text_input, count_vars
    global language
    global suffix_array
    global count_joint_frequency
    surface_flag = False
    ignorepos_flag = False
    mode = []
    for ( o, a ) in opts:
        if o in ( "-i", "--index" ) : 
            open_index( a )
            get_freq_function = get_freq_index
            mode.append( "index" )              
        elif o in ( "-y", "--yahoo" ) :
            print >> sys.stderr, "THIS OPTION IS DEPRECATED AS YAHOO " + \
                                 "SHUT DOWN THEIR FREE SEARCH API"
            sys.exit( 3 )
            #web_freq = YahooFreq()          
            #freq_name = "yahoo"
            #ignorepos_flag = True 
            #the_corpus_size = web_freq.corpus_size()         
            #get_freq_function = get_freq_web
            #mode.append( "yahoo" )   
        elif o in ( "-w", "--google" ) :
            web_freq = GoogleFreq()          
            freq_name = "google"
            ignorepos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" ) 
        elif o in ( "-u", "--univ" ) :
            web_freq = GoogleFreqUniv( a )          
            freq_name = "google"
            ignorepos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" )             
        elif o in ("-s", "--surface" ) :
            surface_flag = True
        elif o in ("-g", "--ignore-pos"): 
            ignorepos_flag = True
        elif o in ("-f", "--from", "-t", "--to" ) :
            try :
                limit = int(a)
                if limit < 0 :
                    raise ValueError, "Argument of " + o + " must be positive"
                if o in ( "-f", "--from" ) :
                    if up_limit == -1 or up_limit >= limit :
                        low_limit = limit
                    else :
                        raise ValueError, "Argument of -f >= argument of -t"
                else :
                    if low_limit == -1 or low_limit <= limit :
                        up_limit = limit
                    else :
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError, message :
                print >> sys.stderr, message
                print >> sys.stderr, "Argument of " + o + " must be integer"
                usage( usage_string )
                sys.exit( 2 )
        elif o in ("-x", "--text" ) : 
            text_input = True
コード例 #14
0
            #build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8')
            build_entry = lambda s, l, p: (l).encode('utf-8')
            suffix_array = index.load("lemma")
        else :      
            build_entry = lambda s, l, p: (l + ATTRIBUTE_SEPARATOR + p).encode('utf-8')
            suffix_array = index.load("lemma+pos")

    else : # Web search, entries are single surface or lemma forms         
        if surface_flag :
            build_entry = lambda s, l, p: s.encode('utf-8')
        else :
            build_entry = lambda s, l, p: l.encode('utf-8')
        
    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option -u, -w or -i, must be provided"
        usage( usage_string )
        sys.exit( 2 )
    elif text_input and web_freq is None :
        print >> sys.stderr, "-x option MUST be used with either -u or -w"
        usage( usage_string )
        sys.exit( 2 )
        
                
    treat_options_simplest( opts, arg, n_arg, usage_string )

################################################################################
# MAIN SCRIPT

longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\
            "from=", "to=", "text", "vars", "lang=", "no-joint", "univ=" ]
arg = read_options( "ywi:vgsf:t:xal:Ju:", longopts, treat_options, -1, usage_string )
コード例 #15
0
    filename = os.path.join(source_dir, '{}.md'.format(slug))
    with open(filename, 'r') as reader:
        content = reader.read()

    headings = SECTION_PAT.findall(content)
    fill(result, headings, slug, 1, 'Section', '{base}.{i}', {'base': base})

    figures = FIGURE_PAT.findall(content)
    fill(result, figures, slug, counters['figure'], 'Figure', '{i}', {})
    counters['figure'] += len(figures)

    tables = TABLE_PAT.findall(content)
    fill(result, tables, slug, counters['table'], 'Table', '{i}', {})
    counters['table'] += len(tables)


def fill(result, items, slug, start, text, fmt, values):
    for (k, i) in zip(items, range(start, start + len(items))):
        values['i'] = i
        result[k] = {
            'slug': slug,
            'text': text,
            'value': fmt.format(**values)
        }


if __name__ == '__main__':
    if len(sys.argv) != 2:
        usage('make_toc.py language')
    main(sys.argv[1])
コード例 #16
0
    print('==frontmatter==\n')
    with open(os.path.join(source_dir, 'index.html')) as reader:
        get_main_div(reader)

    print('==mainmatter==\n')
    for filename in make_filenames(source_dir, toc['lessons']):
        with open(filename, 'r') as reader:
            get_main_div(reader)

    print('==midpoint==\n')
    for filename in make_filenames(source_dir, toc['extras']):
        with open(filename, 'r') as reader:
            get_main_div(reader)


def make_filenames(source_dir, slugs):
    '''Turn slugs into filenames.'''

    return [os.path.join(source_dir, s, 'index.html') for s in slugs]


#-------------------------------------------------------------------------------

if __name__ == '__main__':
    if len(sys.argv) == 1:
        get_main_div(sys.stdin)
    elif len(sys.argv) == 3:
        get_all(sys.argv[1], sys.argv[2])
    else:
        usage('get_body.py [config_file source_dir]')
コード例 #17
0
ファイル: transform.py プロジェクト: shwina/still-magic
# All handlers.
HANDLERS = [
    ExerciseAndSolution, ReplaceInclusion, GlossaryEntry, CrossRef, Figure,
    FigureRmd, FigureRef, Table, TableRef, Noindent, CodeBlock, Citation,
    Newline, PdfToSvg, GifToPng, Quote, Section, Subsection, Subsubsection,
    BibliographyTitle, FrontMatter, MainMatter, Midpoint, SpecialCharacters
]


def main(which, language, include_dir):
    '''
    Apply all pre- or post-processing handlers.
    '''
    lines = sys.stdin.readlines()
    crossref = get_crossref(language)
    for handler in HANDLERS:
        h = handler(crossref, include_dir)
        lines = getattr(h, which)(lines)
    sys.stdout.writelines(lines)


if __name__ == '__main__':
    USAGE = 'transform.py [--pre | --post] language include_dir'
    if len(sys.argv) != 4:
        usage(USAGE)
    elif sys.argv[1] not in ['--pre', '--post']:
        usage(USAGE)
    else:
        main(sys.argv[1].lstrip('-'), sys.argv[2], sys.argv[3])
コード例 #18
0
ファイル: counter.py プロジェクト: KWARC/mwetoolkit
            build_entry = lambda s, l, p: (s + SEPARATOR + WILDCARD).encode('utf-8')
        elif surface_flag :
            build_entry = lambda s, l, p: (s + SEPARATOR + p).encode('utf-8')
        elif pos_flag :
            build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8')
        else :      
            build_entry = lambda s, l, p: (l + SEPARATOR + p).encode('utf-8')
    else : # Web search, entries are single surface or lemma forms         
        if surface_flag :
            build_entry = lambda s, l, p: s.encode('utf-8')
        else :
            build_entry = lambda s, l, p: l.encode('utf-8')
        
    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option -y, -w or -i, must be provided"
        usage( usage_string )
        sys.exit( 2 )
    elif text_input and web_freq is None :
        print >> sys.stderr, "-x option MUST be used with either -y or -w"
        usage( usage_string )
        sys.exit( 2 )
        
                
    treat_options_simplest( opts, arg, n_arg, usage_string )

################################################################################
# MAIN SCRIPT

longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\
            "from=", "to=", "text", "vars", "lang=" ]
arg = read_options( "ywi:vgsf:t:xal:", longopts, treat_options, -1, usage_string )
コード例 #19
0
ファイル: counter.py プロジェクト: KWARC/mwetoolkit
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global text_input, count_vars
    global language
    surface_flag = False
    pos_flag = False
    mode = []
    for ( o, a ) in opts:
        if o in ( "-i", "--index" ) : 
            open_index( a )
            get_freq_function = get_freq_index
            mode.append( "index" )              
        elif o in ( "-y", "--yahoo" ) :
            web_freq = YahooFreq()          
            freq_name = "yahoo"
            pos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "yahoo" )   
        elif o in ( "-w", "--google" ) :
            web_freq = GoogleFreq()          
            freq_name = "google"
            pos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" ) 
        elif o in ("-s", "--surface" ) :
            surface_flag = True
        elif o in ("-g", "--ignore-pos"): 
            pos_flag = True
        elif o in ("-f", "--from", "-t", "--to" ) :
            try :
                limit = int(a)
                if limit < 0 :
                    raise ValueError, "Argument of " + o + " must be positive"
                if o in ( "-f", "--from" ) :
                    if up_limit == -1 or up_limit >= limit :
                        low_limit = limit
                    else :
                        raise ValueError, "Argument of -f >= argument of -t"
                else :
                    if low_limit == -1 or low_limit <= limit :
                        up_limit = limit
                    else :
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError, message :
                print >> sys.stderr, message
                print >> sys.stderr, "Argument of " + o + " must be integer"
                usage( usage_string )
                sys.exit( 2 )
        elif o in ("-x", "--text" ) : 
            text_input = True
コード例 #20
0
    '''
    Main driver: read bibliography from stdin, format, and print.
    '''
    print(HEADER.format(language))
    text = sys.stdin.read()
    if text:
        try:
            source = bibtexparser.loads(text).entries
            for entry in source:
                for h in HANDLERS[entry['ENTRYTYPE']]:
                    if type(h) is tuple:
                        prefix, func = h
                        text = func(entry)
                        if text:
                            sys.stdout.write(prefix + text)
                    elif callable(h):
                        sys.stdout.write(h(entry))
                    else:
                        sys.stdout.write(h)
                sys.stdout.write('\n\n')
        except Exception as e:
            sys.stderr.write('\nERROR {}:: {}\n'.format(str(e), str(entry)))
    print(FOOTER)


# Command-line launch.
if __name__ == '__main__':
    if len(sys.argv) != 2:
        usage('bib2m language < input > output')
    main(sys.argv[1])