Esempio n. 1
0
def runNamespace():
    "Return a URI suitable as a namespace for run-local objects"
    # @@@ include hostname (privacy?) (hash it?)
    global runNamespaceValue
    if runNamespaceValue == None:
        try:
            runNamespaceValue = environ["CWM_RUN_NS"]
        except KeyError:
            runNamespaceValue = uripath.join(
                uripath.base(), ".run-" + `time()` + "p"+ `getpid()` +"#")
        runNamespaceValue = join(base(), runNamespaceValue) # absolutize
    return runNamespaceValue
Esempio n. 2
0
def runNamespace():
    "Return a URI suitable as a namespace for run-local objects"
    # @@@ include hostname (privacy?) (hash it?)
    global runNamespaceValue
    if runNamespaceValue == None:
        try:
            runNamespaceValue = environ["CWM_RUN_NS"]
        except KeyError:
            runNamespaceValue = uripath.join(
                uripath.base(), ".run-" + `time()` + "p"+ `getpid()` +"#")
        runNamespaceValue = join(base(), runNamespaceValue) # absolutize
    return runNamespaceValue
Esempio n. 3
0
 def __init__(self, name, store=None):
     if ':' not in name:    #, "must be absolute: %s" % name
         base = uripath.base()
         name = uripath.join(base, name)
     self._name = name
     self.store = store
     self._seen = {}
Esempio n. 4
0
 def __init__(self, name, store=None):
     if ':' not in name:  #, "must be absolute: %s" % name
         base = uripath.base()
         name = uripath.join(base, name)
     self._name = name
     self.store = store
     self._seen = {}
Esempio n. 5
0
def main(argv):
    import sys

    site, max = argv[1:3]
    max = int(max)
    f = formula()
    here = uripath.base()
    c = Crawler(f, here)
    c.crawlFrom(site, site, max)
    f.close()
    sink = toXML.ToRDF(sys.stdout, here)
    bind('dc', DC(''))
    bind('s', RDFS(''))
    myStore.store.dumpNested(f, sink)
Esempio n. 6
0
def main(argv):
    import sys

    site, max = argv[1:3]
    max = int(max)
    f = formula()
    here = uripath.base()
    c = Crawler(f, here)
    c.crawlFrom(site, site, max)
    f.close()
    sink = toXML.ToRDF(sys.stdout, here)
    bind('dc', DC(''))
    bind('s', RDFS(''))
    myStore.store.dumpNested(f, sink)
Esempio n. 7
0
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
        r = agenda[0]
        agenda = agenda[1:]
        already.append(r)
        crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
Esempio n. 8
0
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
	r = agenda[0]
	agenda = agenda[1:]
	already.append(r)
	crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
Esempio n. 9
0
File: cwm_os.py Progetto: weyls/swap
 def evaluateObject(self, subj_py):
     if verbosity() > 80: progress("os:baseRelative input:"+`subj_py`)
     if isString(subj_py):
         return uripath.refTo(uripath.base(), subj_py)
     progress("Warning: os:baseRelative input is not a string: "+`subj_py`)
Esempio n. 10
0
def load(store, uri=None, openFormula=None, asIfFrom=None, contentType=None,
                flags="", referer=None, why=None, topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
#    if referer is None:
#        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri) # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %(addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40: progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN") # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + `receivedContentType` + " for "+addr)
            if receivedContentType.find('xml') >= 0 or (
                     receivedContentType.find('rdf')>=0
                     and not (receivedContentType.find('n3')>=0)  ):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/n3"
        if guess== None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + `contentType` + " for "+addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0  and not (contentType.find('n3') >= 0 )):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                            guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >=0 or buffer.find('xmlns:') >=0: #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):  
        raise DocumentAccessError(addr, sys.exc_info() )
        
    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer), sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
#       import sax2rdf, xml.sax._exceptions
#       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store, F,  thisDoc=asIfFrom, flags=flags,
                parser=parser, why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store, F,  thisDoc=asIfFrom,flags=flags, why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise
        
    if not openFormula:
        F = F.close()
    return F 
Esempio n. 11
0
    for s in errors: print "\t%s" % s
    exit(-2)
else:
    print "Ok for predictive parsing"

#print "Branch table:", branchTable
print "Literal terminals:", literalTerminals.keys()
print "Token regular expressions:"
for r in tokenRegexps: print "\t%s matches %s" %(r, tokenRegexps[r].pattern) 

yacc=open(argv[1]+"-yacc.y", "w")
yaccConvert(yacc, document, tokenRegexps)
#while agenda:
#    x = agenda[0]
#    agenda = agenda[1:]
#    already.append(x)
#    yaccProduction(yacc, x, tokenRegexps)
yacc.close()

if len(argv) <= 3: exit(0)
parseFile = argv[3]
ip = webAccess.urlopenForRDF(uripath.join(uripath.base(), parseFile), None)
str = ip.read()
sink = g.newFormula()
p = PredictiveParser(sink=sink, top=document, branchTable= branchTable,
	tokenRegexps= tokenRegexps)
p.parse(str)

    
#ends
Esempio n. 12
0
else:
    print "Ok for predictive parsing"

#print "Branch table:", branchTable
print "Literal terminals:", literalTerminals.keys()
print "Token regular expressions:"
for r in tokenRegexps:
    print "\t%s matches %s" % (r, tokenRegexps[r].pattern)

yacc = open(argv[1] + "-yacc.y", "w")
yaccConvert(yacc, document, tokenRegexps)
#while agenda:
#    x = agenda[0]
#    agenda = agenda[1:]
#    already.append(x)
#    yaccProduction(yacc, x, tokenRegexps)
yacc.close()

if len(argv) <= 3: exit(0)
parseFile = argv[3]
ip = webAccess.urlopenForRDF(uripath.join(uripath.base(), parseFile), None)
str = ip.read()
sink = g.newFormula()
p = PredictiveParser(sink=sink,
                     top=document,
                     branchTable=branchTable,
                     tokenRegexps=tokenRegexps)
p.parse(str)

#ends
Esempio n. 13
0
def load(store,
         uri=None,
         openFormula=None,
         asIfFrom=None,
         contentType=None,
         flags="",
         referer=None,
         why=None,
         topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
    #    if referer is None:
    #        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri)  # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %
                         (addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(
                HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40:
                progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN")  # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + ` receivedContentType ` +
                         " for " + addr)
            if receivedContentType.find('xml') >= 0 or (
                    receivedContentType.find('rdf') >= 0
                    and not (receivedContentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/rdf+n3"
        if guess == None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + ` contentType ` + " for " +
                         addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0
                    and not (contentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/rdf+n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/rdf+n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >= 0 or buffer.find('xmlns:') >= 0:  #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/rdf+n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):
        raise DocumentAccessError(addr, sys.exc_info())

    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer),
                                   sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
        #       import sax2rdf, xml.sax._exceptions
        #       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store,
                                F,
                                thisDoc=asIfFrom,
                                flags=flags,
                                parser=parser,
                                why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/rdf+n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store,
                                     F,
                                     thisDoc=asIfFrom,
                                     flags=flags,
                                     why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise

    if not openFormula:
        F = F.close()
    return F