Ejemplo n.º 1
0
def parse(uri, start, followers, null):
    k0 = time.time()
    lexer = sparql_tokens.Lexer()
    k1 = time.time()
    ip = webAccess.urlopenForRDF(uri, None)
    lexer.input(ip)
    parser = Earley(start,followers, null)
    print 'ready to parse\n\n'
    k2 = time.time()
    return (parser.parse(lexer.token)[-1].productions[start], k1 - k0, k1)
Ejemplo n.º 2
0
def parse(uri, start, followers, null):
    k0 = time.time()
    lexer = sparql_tokens.Lexer()
    k1 = time.time()
    ip = webAccess.urlopenForRDF(uri, None)
    lexer.input(ip)
    parser = Earley(start, followers, null)
    print 'ready to parse\n\n'
    k2 = time.time()
    return (parser.parse(lexer.token)[-1].productions[start], k1 - k0, k1)
Ejemplo n.º 3
0
def main():
    global already, agenda, errors
    parseAs = None
    grammarFile = None
    parseFile = None
    yaccFile = None
    global verbose
    global g
    verbose = 0
    lumped = 1

    try:
        opts, args = getopt.getopt(
            sys.argv[1:], "ha:v:p:g:y:",
            ["help", "as=", "verbose=", "parse=", "grammar=", "yacc="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    output = None
    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()
        if o in ("-v", "--verbose"):
            verbose = int(a)
            diag.chatty_flag = int(a)
        if o in ("-a", "--as"):
            parseAs = uripath.join(uripath.base(), a)
        if o in ("-p", "--parse"):
            parseFile = uripath.join(uripath.base(), a)
        if o in ("-g", "--grammar"):
            grammarFile = uripath.join(uripath.base(), a)
        if o in ("-y", "--yacc"):
            yaccFile = uripath.join(uripath.base(), a)[5:]  # strip off file:

#    if testFiles == []: testFiles = [ "/dev/stdin" ]
    if not parseAs:
        usage()
        sys.exit(2)
    parseAs = uripath.join(uripath.base(), parseAs)
    if not grammarFile:
        grammarFile = parseAs.split("#")[0]  # strip off fragid
    else:
        grammarFile = uripath.join(uripath.base(), grammarFile)

    # The Grammar formula
    progress("Loading " + grammarFile)
    start = clock()
    g = load(grammarFile)
    taken = clock() - start + 1
    progress("Loaded %i statements in %fs, ie %f/s." %
             (len(g), taken, len(g) / taken))

    document = g.newSymbol(parseAs)

    already = []
    agenda = []
    errors = []
    doProduction(document)
    while agenda:
        x = agenda[0]
        agenda = agenda[1:]
        already.append(x)
        doProduction(x)

    if errors != []:
        progress("###### FAILED with %i errors." % len(errors))
        for s in errors:
            progress("\t%s" % s)
        exit(-2)
    else:
        progress("Ok for predictive parsing")

    #if parser.verb: progress "Branch table:", branchTable
    if verbose:
        progress("Literal terminals: %s" % literalTerminals.keys())
        progress("Token regular expressions:")
        for r in tokenRegexps:
            progress("\t%s matches %s" % (r, tokenRegexps[r].pattern))

    if yaccFile:
        yacc = open(yaccFile, "w")
        yaccConvert(yacc, document, tokenRegexps)
        yacc.close()

    if parseFile == None: exit(0)

    ip = webAccess.urlopenForRDF(parseFile, None)

    lexer = sparql_tokens.Lexer()
    lexer.input(ip)
    #str = ip.read().decode('utf_8')
    sink = g.newFormula()
    keywords = g.each(pred=BNF.keywords, subj=document)
    keywords = [a.value() for a in keywords]
    p = PredictiveParser(sink=sink,
                         top=document,
                         branchTable=branchTable,
                         tokenSet=tokenSet,
                         keywords=keywords)
    p.verb = 1
    start = clock()
    #print lexer.token()
    print p.parse(lexer.token)
    taken = clock() - start + 1
    #    progress("Loaded %i chars in %fs, ie %f/s." %
    #       (len(str), taken, len(str)/taken))
    progress("Parsed <%s> OK" % parseFile)
    sys.exit(0)  # didn't crash
Ejemplo n.º 4
0
def main():
    global already, agenda, errors
    parseAs = None
    grammarFile = None
    parseFile = None
    yaccFile = None
    global verbose
    global g
    verbose = 0
    lumped = 1

    try:
        opts, args = getopt.getopt(sys.argv[1:], "ha:v:p:g:y:",
            ["help", "as=",  "verbose=", "parse=", "grammar=", "yacc="])
    except getopt.GetoptError:
        usage()
        sys.exit(2)
    output = None
    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()
        if o in ("-v", "--verbose"):
            verbose =int(a)
            diag.chatty_flag = int(a)
        if o in ("-a", "--as"):
            parseAs = uripath.join(uripath.base(), a)
        if o in ("-p", "--parse"):
            parseFile = uripath.join(uripath.base(), a)
        if o in ("-g", "--grammar"):
            grammarFile = uripath.join(uripath.base(), a)
        if o in ("-y", "--yacc"):
            yaccFile = uripath.join(uripath.base(), a)[5:]  # strip off file:

    

#    if testFiles == []: testFiles = [ "/dev/stdin" ]
    if not parseAs:
        usage()
        sys.exit(2)
    parseAs = uripath.join(uripath.base(), parseAs)
    if not grammarFile:
        grammarFile = parseAs.split("#")[0]   # strip off fragid
    else:
        grammarFile = uripath.join(uripath.base(), grammarFile)


    
    # The Grammar formula
    progress("Loading " + grammarFile)
    start = clock()
    g = load(grammarFile)
    taken = clock() - start + 1
    progress("Loaded %i statements in %fs, ie %f/s." %
        (len(g), taken, len(g)/taken))
    
    document = g.newSymbol(parseAs)
    
    already = []
    agenda = []
    errors = []
    doProduction(document)
    while agenda:
        x = agenda[0]
        agenda = agenda[1:]
        already.append(x)
        doProduction(x)
        
    if errors != []:
        progress("###### FAILED with %i errors." % len(errors))
        for s in errors: progress ("\t%s" % s)
        exit(-2)
    else:
        progress( "Ok for predictive parsing")
    
    #if parser.verb: progress "Branch table:", branchTable
    if verbose:
        progress( "Literal terminals: %s" %  literalTerminals.keys())
        progress("Token regular expressions:")
        for r in tokenRegexps:
            progress( "\t%s matches %s" %(r, tokenRegexps[r].pattern) )
    
    if yaccFile:
        yacc=open(yaccFile, "w")
        yaccConvert(yacc, document, tokenRegexps)
        yacc.close()

    if parseFile == None: exit(0)

    
    ip = webAccess.urlopenForRDF(parseFile, None)
    
    str = ip.read().decode('utf_8')
    sink = g.newFormula()
    keywords = g.each(pred=BNF.keywords, subj=document)
    keywords = [a.value() for a in keywords]
    p = PredictiveParser(sink=sink, top=document, branchTable= branchTable,
            tokenRegexps= tokenRegexps, keywords =  keywords)
    p.verb = verbose
    start = clock()
    p.parse(str)
    taken = clock() - start + 1
    progress("Loaded %i chars in %fs, ie %f/s." %
        (len(str), taken, len(str)/taken))
    progress("Parsed <%s> OK" % parseFile)
    sys.exit(0)   # didn't crash
Ejemplo n.º 5
0
        print "No server root URL path given (--webroot)",
        webroot = "/archive"
        print "Assuming " + webroot
    if (uri == None):
        print "No URI  given\n\n" + __doc__
        sys.exit(2)
    if (uri[:7] != 'http://' and uri[:7] != 'htts://'):
        print "URI is not http[s]:// URI \n" + __doc__
        sys.exit(2)

    #try:
    if filename != None:
        datafile = open(filename, "r")

    else:
        datafile = urlopenForRDF(uri)
        headers = datafile.headers
        print "Headers: " + ` headers `

    #except:
    #    print "Cannot access file \n" + filename + "\n"
    #    sys.exit(3);

    data = datafile.read()
    print "Data length: ", len(data)

    if data.find("rdf") < 0:
        print "File does not look like a data file:\n" + data[:100] + "\n"
        sys.exit(3)

    path = root + '/' + uri[7:]