Ejemplo n.º 1
0
def treat_options(opts, arg, n_arg, usage_string):
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index
    mode = []
    for (o, a) in opts:
        if o in ("-p", "--patterns"):
            read_patterns_file(a)
            mode.append("patterns")
        elif o in ("-n", "--ngram"):
            create_patterns_file(a)
            mode.append("ngram")
        elif o in ("-g", "--ignore-pos"):
            ignore_pos = True
        elif o in ("-s", "--surface"):
            surface_instead_lemmas = True
        elif o in ("-f", "--freq"):
            print_cand_freq = True
        elif o in ("-i", "--index"):
            corpus_from_index = True

    if len(mode) != 1:
        print >> sys.stderr, "Exactly one option, -p or -n, must be provided"
        usage(usage_string)
        sys.exit(2)

    treat_options_simplest(opts, arg, n_arg, usage_string)
Ejemplo n.º 2
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global patterns, ignore_pos, surface_instead_lemmas, print_cand_freq, corpus_from_index
    mode = []
    for ( o, a ) in opts:
        if o in ("-p", "--patterns") : 
            read_patterns_file( a )
            mode.append( "patterns" )
        elif o in ( "-n", "--ngram" ) :
            create_patterns_file( a )
            mode.append( "ngram" )
        elif o in ("-g", "--ignore-pos") : 
            ignore_pos = True
        elif o in ("-s", "--surface") : 
            surface_instead_lemmas = True
        elif o in ("-f", "--freq") : 
            print_cand_freq = True
        elif o in ("-i", "--index") :
            corpus_from_index = True

    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option, -p or -n, must be provided"
        usage( usage_string )
        sys.exit( 2 )
        
    treat_options_simplest( opts, arg, n_arg, usage_string )
Ejemplo n.º 3
0
def execute_command(args=sys.argv[1:]):
    if not args:
        usage()
        sys.exit(1)

    command = args[0]
    if command.startswith('-'):
        if command in ('-h', '--help'):
            usage(bddown_help.show_help())
        elif command in ('-V', '-v', '--version'):
            print 'V1.54'
        else:
            usage()
            sys.exit(1)
        sys.exit(0)

    commands = {
        'help':         bd_help,
        'login':        login,
        'download':     download,
        'd':            download,   # alias download
        'export':       export,
        'show':         show,
        'config':       config
    }

    if command not in commands.keys():
        usage()
        sys.exit(1)
    elif '-h' in args or '--help' in args:
        bd_help([command])
        sys.exit(0)
    else:
        commands[command](args[1:])
Ejemplo n.º 4
0
def execute_command(args=sys.argv[1:]):
    if not args:
        usage()
        sys.exit(1)

    command = args[0]
    if command.startswith('-'):
        if command in ('-h', '--help'):
            usage(bddown_help.show_help())
        elif command in ('-V', '-v', '--version'):
            print 'V1.54'
        else:
            usage()
            sys.exit(1)
        sys.exit(0)

    commands = {
        'help': bd_help,
        'login': login,
        'download': download,
        'd': download,  # alias download
        'export': export,
        'show': show,
        'config': config
    }

    if command not in commands.keys():
        usage()
        sys.exit(1)
    elif '-h' in args or '--help' in args:
        bd_help([command])
        sys.exit(0)
    else:
        commands[command](args[1:])
Ejemplo n.º 5
0
def bd_help(args):
    if len(args) == 1:
        helper = getattr(bddown_help, args[0].lower(), bddown_help.help)
        usage(helper)
    elif len(args) == 0:
        usage(bddown_help.show_help)
    else:
        usage(bddown_help.help)
Ejemplo n.º 6
0
def bd_help(args):
    if len(args) == 1:
        helper = getattr(bddown_help, args[0].lower(), bddown_help.help)
        usage(helper)
    elif len(args) == 0:
        usage(bddown_help.show_help)
    else:
        usage(bddown_help.help)
Ejemplo n.º 7
0
def create_patterns_file( ngram_range ) :
    """
        Create an artificial list of MWE patterns in which all the parts of
        the words are wildcards. Such artificial patterns match every ngram
        of size n, which is exactly what we want to do with the option -n. This
        may seem a weird way to extract ngrams, but it allows a single 
        transparent candidate extraction function, treat_sentence.
        
        @param ngram_range String argument of the -n option, has the form 
        "<min>:<max>"        

        FIXMEFIXMEFIXME
    """        
    global patterns, usage_string, shortest_pattern, longest_pattern
    result = interpret_ngram( ngram_range )
    if result :
        ( shortest_pattern, longest_pattern ) = result
        patterns.append(build_generic_pattern(shortest_pattern, longest_pattern))
    else :
        print >> sys.stderr, "The format of the argument must be <min>:<max>"
        print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10"
        usage( usage_string )
        sys.exit( 2 )  
Ejemplo n.º 8
0
def create_patterns_file(ngram_range):
    """
        Create an artificial list of MWE patterns in which all the parts of
        the words are wildcards. Such artificial patterns match every ngram
        of size n, which is exactly what we want to do with the option -n. This
        may seem a weird way to extract ngrams, but it allows a single 
        transparent candidate extraction function, treat_sentence.
        
        @param ngram_range String argument of the -n option, has the form 
        "<min>:<max>"        

        FIXMEFIXMEFIXME
    """
    global patterns, usage_string, shortest_pattern, longest_pattern
    result = interpret_ngram(ngram_range)
    if result:
        (shortest_pattern, longest_pattern) = result
        patterns.append(
            build_generic_pattern(shortest_pattern, longest_pattern))
    else:
        print >> sys.stderr, "The format of the argument must be <min>:<max>"
        print >> sys.stderr, "<min> must be at least 1 and <max> is at most 10"
        usage(usage_string)
        sys.exit(2)
Ejemplo n.º 9
0
            'value': key
        }
        figure = process(result, source_dir, slug, key, figure)

    language = source_dir.lstrip('_')
    json.dump(result, sys.stdout)


def process(result, source_dir, slug, base, figure_start):
    filename = os.path.join(source_dir, '{}.md'.format(slug))
    with open(filename, 'r') as reader:
        content = reader.read()
    headings = SECTION_PAT.findall(content)
    for (h, i) in zip(headings, range(1, len(headings) + 1)):
        result[h] = {
            'slug': slug,
            'text': 'Section',
            'value': '{}.{}'.format(base, i)
        }
    figures = FIGURE_PAT.findall(content)
    for (f, i) in zip(figures, range(figure_start,
                                     len(figures) + figure_start)):
        result[f] = {'slug': slug, 'text': 'Figure', 'value': '{}'.format(i)}
    return len(figures) + figure_start


if __name__ == '__main__':
    if len(sys.argv) != 3:
        usage('make_toc.py config_file source_dir')
    main(sys.argv[1], sys.argv[2])
Ejemplo n.º 10
0
    print('==frontmatter==\n')
    with open(os.path.join(source_dir, 'index.html')) as reader:
        get_main_div(reader)

    print('==mainmatter==\n')
    for filename in make_filenames(source_dir, toc['lessons']):
        with open(filename, 'r') as reader:
            get_main_div(reader)

    print('==midpoint==\n')
    for filename in make_filenames(source_dir, toc['extras']):
        with open(filename, 'r') as reader:
            get_main_div(reader)


def make_filenames(source_dir, slugs):
    '''Turn slugs into filenames.'''

    return [os.path.join(source_dir, '{}.html'.format(s)) for s in slugs]


#-------------------------------------------------------------------------------

if __name__ == '__main__':
    if len(sys.argv) == 1:
        get_main_div(sys.stdin)
    elif len(sys.argv) == 2:
        get_all(sys.argv[1])
    else:
        usage('get_body.py [source_dir]')
Ejemplo n.º 11
0
        for (l, r) in zip_longest(left, right, fillvalue='')
    ]


if __name__ == '__main__':
    single, multi = None, None
    options = {
        'collapse_comments': True,
        'language': None,
        'names_only': False,
        'rejoin_lines': True,
        'verbose': False
    }
    choices, extras = getopt.getopt(sys.argv[1:], 'aCd:f:Jnv')
    if len(extras) != 1:
        usage(
            'mismatch.py [-a | -d dir | -f file] [-C] [-J] [-n] [-v] language')
    options['language'] = extras[0]
    for (opt, arg) in choices:
        if opt == '-a':
            pass
        elif opt == '-C':
            options['collapse_comments'] = False
        elif opt == '-d':
            multi = arg
        elif opt == '-f':
            single = arg
        elif opt == '-J':
            options['rejoin_lines'] = False
        elif opt == '-n':
            options['names_only'] = True
        elif opt == '-v':
Ejemplo n.º 12
0
    for op, value in opts:
        if op == "-c":
            spider_mode = True
            suit = 'paladin-s'
            logger.info('spider mode open')
        elif op == '-w':
            with_server = True
            logger.info('start working with server')
        elif op == '-s':
            serial = value
            logger.info('read serial setting: ' + serial)
        elif op == '-t':
            suit = value
            logger.info('read suit setting: ' + suit)
        elif op == "-h":
            usage()
            sys.exit()
        else:
            usage()
            sys.exit()

    # start web retriever
    logger.info("start web retriever, lestining on port: " +
                str(web_retriever_port))
    web_retriever_log = open("web.log", "a")
    web_retriever = RunCmd([
        'node', 'web retriever/ui/main.js', '--serial', serial,
        '--forward-port',
        str(web_forward_port), '--server-port',
        str(web_retriever_port), '--output', './output/'
    ])
Ejemplo n.º 13
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global text_input, count_vars
    global language
    global suffix_array
    global count_joint_frequency
    surface_flag = False
    ignorepos_flag = False
    mode = []
    for ( o, a ) in opts:
        if o in ( "-i", "--index" ) : 
            open_index( a )
            get_freq_function = get_freq_index
            mode.append( "index" )              
        elif o in ( "-y", "--yahoo" ) :
            print >> sys.stderr, "THIS OPTION IS DEPRECATED AS YAHOO " + \
                                 "SHUT DOWN THEIR FREE SEARCH API"
            sys.exit( 3 )
            #web_freq = YahooFreq()          
            #freq_name = "yahoo"
            #ignorepos_flag = True 
            #the_corpus_size = web_freq.corpus_size()         
            #get_freq_function = get_freq_web
            #mode.append( "yahoo" )   
        elif o in ( "-w", "--google" ) :
            web_freq = GoogleFreq()          
            freq_name = "google"
            ignorepos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" ) 
        elif o in ( "-u", "--univ" ) :
            web_freq = GoogleFreqUniv( a )          
            freq_name = "google"
            ignorepos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" )             
        elif o in ("-s", "--surface" ) :
            surface_flag = True
        elif o in ("-g", "--ignore-pos"): 
            ignorepos_flag = True
        elif o in ("-f", "--from", "-t", "--to" ) :
            try :
                limit = int(a)
                if limit < 0 :
                    raise ValueError, "Argument of " + o + " must be positive"
                if o in ( "-f", "--from" ) :
                    if up_limit == -1 or up_limit >= limit :
                        low_limit = limit
                    else :
                        raise ValueError, "Argument of -f >= argument of -t"
                else :
                    if low_limit == -1 or low_limit <= limit :
                        up_limit = limit
                    else :
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError, message :
                print >> sys.stderr, message
                print >> sys.stderr, "Argument of " + o + " must be integer"
                usage( usage_string )
                sys.exit( 2 )
        elif o in ("-x", "--text" ) : 
            text_input = True
Ejemplo n.º 14
0
            #build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8')
            build_entry = lambda s, l, p: (l).encode('utf-8')
            suffix_array = index.load("lemma")
        else :      
            build_entry = lambda s, l, p: (l + ATTRIBUTE_SEPARATOR + p).encode('utf-8')
            suffix_array = index.load("lemma+pos")

    else : # Web search, entries are single surface or lemma forms         
        if surface_flag :
            build_entry = lambda s, l, p: s.encode('utf-8')
        else :
            build_entry = lambda s, l, p: l.encode('utf-8')
        
    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option -u, -w or -i, must be provided"
        usage( usage_string )
        sys.exit( 2 )
    elif text_input and web_freq is None :
        print >> sys.stderr, "-x option MUST be used with either -u or -w"
        usage( usage_string )
        sys.exit( 2 )
        
                
    treat_options_simplest( opts, arg, n_arg, usage_string )

################################################################################
# MAIN SCRIPT

longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\
            "from=", "to=", "text", "vars", "lang=", "no-joint", "univ=" ]
arg = read_options( "ywi:vgsf:t:xal:Ju:", longopts, treat_options, -1, usage_string )
Ejemplo n.º 15
0
    filename = os.path.join(source_dir, '{}.md'.format(slug))
    with open(filename, 'r') as reader:
        content = reader.read()

    headings = SECTION_PAT.findall(content)
    fill(result, headings, slug, 1, 'Section', '{base}.{i}', {'base': base})

    figures = FIGURE_PAT.findall(content)
    fill(result, figures, slug, counters['figure'], 'Figure', '{i}', {})
    counters['figure'] += len(figures)

    tables = TABLE_PAT.findall(content)
    fill(result, tables, slug, counters['table'], 'Table', '{i}', {})
    counters['table'] += len(tables)


def fill(result, items, slug, start, text, fmt, values):
    for (k, i) in zip(items, range(start, start + len(items))):
        values['i'] = i
        result[k] = {
            'slug': slug,
            'text': text,
            'value': fmt.format(**values)
        }


if __name__ == '__main__':
    if len(sys.argv) != 2:
        usage('make_toc.py language')
    main(sys.argv[1])
Ejemplo n.º 16
0
    print('==frontmatter==\n')
    with open(os.path.join(source_dir, 'index.html')) as reader:
        get_main_div(reader)

    print('==mainmatter==\n')
    for filename in make_filenames(source_dir, toc['lessons']):
        with open(filename, 'r') as reader:
            get_main_div(reader)

    print('==midpoint==\n')
    for filename in make_filenames(source_dir, toc['extras']):
        with open(filename, 'r') as reader:
            get_main_div(reader)


def make_filenames(source_dir, slugs):
    '''Turn slugs into filenames.'''

    return [os.path.join(source_dir, s, 'index.html') for s in slugs]


#-------------------------------------------------------------------------------

if __name__ == '__main__':
    if len(sys.argv) == 1:
        get_main_div(sys.stdin)
    elif len(sys.argv) == 3:
        get_all(sys.argv[1], sys.argv[2])
    else:
        usage('get_body.py [config_file source_dir]')
Ejemplo n.º 17
0
# All handlers.
HANDLERS = [
    ExerciseAndSolution, ReplaceInclusion, GlossaryEntry, CrossRef, Figure,
    FigureRmd, FigureRef, Table, TableRef, Noindent, CodeBlock, Citation,
    Newline, PdfToSvg, GifToPng, Quote, Section, Subsection, Subsubsection,
    BibliographyTitle, FrontMatter, MainMatter, Midpoint, SpecialCharacters
]


def main(which, language, include_dir):
    '''
    Apply all pre- or post-processing handlers.
    '''
    lines = sys.stdin.readlines()
    crossref = get_crossref(language)
    for handler in HANDLERS:
        h = handler(crossref, include_dir)
        lines = getattr(h, which)(lines)
    sys.stdout.writelines(lines)


if __name__ == '__main__':
    USAGE = 'transform.py [--pre | --post] language include_dir'
    if len(sys.argv) != 4:
        usage(USAGE)
    elif sys.argv[1] not in ['--pre', '--post']:
        usage(USAGE)
    else:
        main(sys.argv[1].lstrip('-'), sys.argv[2], sys.argv[3])
Ejemplo n.º 18
0
            build_entry = lambda s, l, p: (s + SEPARATOR + WILDCARD).encode('utf-8')
        elif surface_flag :
            build_entry = lambda s, l, p: (s + SEPARATOR + p).encode('utf-8')
        elif pos_flag :
            build_entry = lambda s, l, p: (l + SEPARATOR + WILDCARD).encode('utf-8')
        else :      
            build_entry = lambda s, l, p: (l + SEPARATOR + p).encode('utf-8')
    else : # Web search, entries are single surface or lemma forms         
        if surface_flag :
            build_entry = lambda s, l, p: s.encode('utf-8')
        else :
            build_entry = lambda s, l, p: l.encode('utf-8')
        
    if len(mode) != 1 :
        print >> sys.stderr, "Exactly one option -y, -w or -i, must be provided"
        usage( usage_string )
        sys.exit( 2 )
    elif text_input and web_freq is None :
        print >> sys.stderr, "-x option MUST be used with either -y or -w"
        usage( usage_string )
        sys.exit( 2 )
        
                
    treat_options_simplest( opts, arg, n_arg, usage_string )

################################################################################
# MAIN SCRIPT

longopts = ["yahoo", "google", "index=", "verbose", "ignore-pos", "surface",\
            "from=", "to=", "text", "vars", "lang=" ]
arg = read_options( "ywi:vgsf:t:xal:", longopts, treat_options, -1, usage_string )
Ejemplo n.º 19
0
def treat_options( opts, arg, n_arg, usage_string ) :
    """
        Callback function that handles the command line options of this script.
        
        @param opts The options parsed by getopts. Ignored.
        
        @param arg The argument list parsed by getopts.
        
        @param n_arg The number of arguments expected for this script.    
    """
    global cache_file, get_freq_function, build_entry, web_freq
    global the_corpus_size, freq_name
    global low_limit, up_limit
    global text_input, count_vars
    global language
    surface_flag = False
    pos_flag = False
    mode = []
    for ( o, a ) in opts:
        if o in ( "-i", "--index" ) : 
            open_index( a )
            get_freq_function = get_freq_index
            mode.append( "index" )              
        elif o in ( "-y", "--yahoo" ) :
            web_freq = YahooFreq()          
            freq_name = "yahoo"
            pos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "yahoo" )   
        elif o in ( "-w", "--google" ) :
            web_freq = GoogleFreq()          
            freq_name = "google"
            pos_flag = True 
            the_corpus_size = web_freq.corpus_size()         
            get_freq_function = get_freq_web
            mode.append( "google" ) 
        elif o in ("-s", "--surface" ) :
            surface_flag = True
        elif o in ("-g", "--ignore-pos"): 
            pos_flag = True
        elif o in ("-f", "--from", "-t", "--to" ) :
            try :
                limit = int(a)
                if limit < 0 :
                    raise ValueError, "Argument of " + o + " must be positive"
                if o in ( "-f", "--from" ) :
                    if up_limit == -1 or up_limit >= limit :
                        low_limit = limit
                    else :
                        raise ValueError, "Argument of -f >= argument of -t"
                else :
                    if low_limit == -1 or low_limit <= limit :
                        up_limit = limit
                    else :
                        raise ValueError, "Argument of -t <= argument of -t"
            except ValueError, message :
                print >> sys.stderr, message
                print >> sys.stderr, "Argument of " + o + " must be integer"
                usage( usage_string )
                sys.exit( 2 )
        elif o in ("-x", "--text" ) : 
            text_input = True
Ejemplo n.º 20
0
    '''
    Main driver: read bibliography from stdin, format, and print.
    '''
    print(HEADER.format(language))
    text = sys.stdin.read()
    if text:
        try:
            source = bibtexparser.loads(text).entries
            for entry in source:
                for h in HANDLERS[entry['ENTRYTYPE']]:
                    if type(h) is tuple:
                        prefix, func = h
                        text = func(entry)
                        if text:
                            sys.stdout.write(prefix + text)
                    elif callable(h):
                        sys.stdout.write(h(entry))
                    else:
                        sys.stdout.write(h)
                sys.stdout.write('\n\n')
        except Exception as e:
            sys.stderr.write('\nERROR {}:: {}\n'.format(str(e), str(entry)))
    print(FOOTER)


# Command-line launch.
if __name__ == '__main__':
    if len(sys.argv) != 2:
        usage('bib2m language < input > output')
    main(sys.argv[1])