Ejemplo n.º 1
0
def runNamespace():
    "Return a URI suitable as a namespace for run-local objects"
    # @@@ include hostname (privacy?) (hash it?)
    global runNamespaceValue
    if runNamespaceValue == None:
        try:
            runNamespaceValue = environ["CWM_RUN_NS"]
        except KeyError:
            runNamespaceValue = uripath.join(
                uripath.base(), ".run-" + `time()` + "p"+ `getpid()` +"#")
        runNamespaceValue = join(base(), runNamespaceValue) # absolutize
    return runNamespaceValue
Ejemplo n.º 2
0
 def __init__(self, name, store=None):
     if ':' not in name:  #, "must be absolute: %s" % name
         base = uripath.base()
         name = uripath.join(base, name)
     self._name = name
     self.store = store
     self._seen = {}
Ejemplo n.º 3
0
 def __init__(self, name, store=None):
     if ':' not in name:    #, "must be absolute: %s" % name
         base = uripath.base()
         name = uripath.join(base, name)
     self._name = name
     self.store = store
     self._seen = {}
 def bind(self, pfx, ref):
     ref = ref[1:-1]  # take of <>'s
     addr = uripath.join(self._baseURI, ref)
     #DEBUG("bind", pfx, ref, addr)
     self._sink.bind(pfx, addr)
     #@@ check for pfx already bound?
     self._prefixes[pfx] = addr
Ejemplo n.º 5
0
 def bind(self, pfx, ref):
     ref = ref[1:-1] # take of <>'s
     addr = uripath.join(self._baseURI, ref)
     #DEBUG("bind", pfx, ref, addr)
     self._sink.bind(pfx, addr)
     #@@ check for pfx already bound?
     self._prefixes[pfx] = addr
Ejemplo n.º 6
0
    def __init__(self,
                 sink,
                 openFormula,
                 thisDoc,
                 baseURI=None,
                 flags="",
                 why=None):
        self.testdata = ""
        if XMLLiteralsAsDomTrees:
            self.domImplementation = xml.dom.getDOMImplementation()
            self.domDocument = None
            self.domElement = None
        self.flags = flags
        self._stack = []  # Stack of states
        self._nsmap = []  # stack of namespace bindings
        self._prefixMap = []
        self.LiteralNS = None
        self._delayedStatement = None
        self.sink = sink
        self._thisDoc = thisDoc
        if baseURI != None: self._base = baseURI
        else: self._base = thisDoc
        self._state = STATE_OUTERMOST  # Maybe should ignore RDF outside <rdf:RDF>??
        if sink:
            if openFormula == None:
                self._context = sink.newFormula(thisDoc + "#_formula")
            else:
                self._context = openFormula
            self._formula = self._context  # Root formula
            self._genPrefix = uripath.join(
                thisDoc, "#_rdfxg")  # allow parameter override?
            self.sink.setGenPrefix(self._genPrefix)
            self.sink.startDoc()
            self.merge = self.sink.newSymbol(NODE_MERGE_URI)
        else:
            self._context = None
        self._reason = why  # Why the parser w
        self._reason2 = None  # Why these triples
        if diag.tracking:
            self._reason2 = BecauseOfData(sink.newSymbol(thisDoc),
                                          because=self._reason)

        self._subject = None
        self._predicate = None
        self._datatype = None
        self._language = None
        self._nodeIDs = {}
        self._items = []  # for <rdf:li> containers
        self._litDepth = 0

        version = "$Id: sax2rdf.py,v 1.52 2007/06/26 02:36:15 syosi Exp $"
        #        self.sink.makeComment("RDF parsed by "+version[1:-1])

        if "D" in self.flags:  # Assume default namespace declaration
            if sink: self.sink.setDefaultNamespace(self._thisDoc + "#")
            self._nsmap = [{"": "#"}]
Ejemplo n.º 7
0
def runNamespace():
    "Return a URI suitable as a namespace for run-local objects"
    # @@@ include hostname (privacy?) (hash it?)
    global runNamespaceValue
    if runNamespaceValue == None:
        try:
            runNamespaceValue = environ["CWM_RUN_NS"]
        except KeyError:
            runNamespaceValue = uripath.join(
                uripath.base(), ".run-" + `time()` + "p"+ `getpid()` +"#")
        runNamespaceValue = join(base(), runNamespaceValue) # absolutize
    return runNamespaceValue
Ejemplo n.º 8
0
    def __init__(self, sink, openFormula, thisDoc, baseURI=None, flags="", why=None):
        self.testdata = ""
        if XMLLiteralsAsDomTrees:
            self.domImplementation = xml.dom.getDOMImplementation()
            self.domDocument = None
            self.domElement = None
        self.flags = flags
        self._stack = []  # Stack of states
        self._nsmap = []  # stack of namespace bindings
        self._prefixMap = []
        self.LiteralNS = None
        self._delayedStatement = None
        self.sink = sink
        self._thisDoc = thisDoc
        if baseURI != None:
            self._base = baseURI
        else:
            self._base = thisDoc
        self._state = STATE_OUTERMOST  # Maybe should ignore RDF outside <rdf:RDF>??
        if sink:
            if openFormula == None:
                self._context = sink.newFormula(thisDoc + "#_formula")
            else:
                self._context = openFormula
            self._formula = self._context  # Root formula
            self._genPrefix = uripath.join(thisDoc, "#_rdfxg")  # allow parameter override?
            self.sink.setGenPrefix(self._genPrefix)
            self.sink.startDoc()
            self.merge = self.sink.newSymbol(NODE_MERGE_URI)
        else:
            self._context = None
        self._reason = why  # Why the parser w
        self._reason2 = None  # Why these triples
        if diag.tracking:
            self._reason2 = BecauseOfData(sink.newSymbol(thisDoc), because=self._reason)

        self._subject = None
        self._predicate = None
        self._datatype = None
        self._language = None
        self._nodeIDs = {}
        self._items = []  # for <rdf:li> containers
        self._litDepth = 0

        version = "$Id: sax2rdf.py,v 1.52 2007/06/26 02:36:15 syosi Exp $"
        #        self.sink.makeComment("RDF parsed by "+version[1:-1])

        if "D" in self.flags:  # Assume default namespace declaration
            if sink:
                self.sink.setDefaultNamespace(self._thisDoc + "#")
            self._nsmap = [{"": "#"}]
Ejemplo n.º 9
0
 def load(self, uri, baseURI=''): 
     if uri: 
         import urllib
         uri = uripath.join(baseURI, uri)
         self._sink.makeComment("Taking input from " + uri)
         self.startDoc()
         self.feed(urllib.urlopen(uri).read())
         self.endDoc()
     else: 
         import sys
         self._sink.makeComment("Taking input from standard input")
         self.startDoc()
         self.feed(sys.stdin.read())
         self.endDoc()
Ejemplo n.º 10
0
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
        r = agenda[0]
        agenda = agenda[1:]
        already.append(r)
        crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
Ejemplo n.º 11
0
def doCommand():
    """Command line RDF/N3 crawler
        
 crawl <uriref>

options:
 
See http://www.w3.org/2000/10/swap/doc/cwm  for more documentation.
"""
    global agenda
    global already
    uriref = sys.argv[1]
    uri = join(base(), uriref)
    r = symbol(uri)
    diag.setVerbosity(0)
    print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>."
    print "# Generated by crawl.py ", cvsRevision[1:-1]
    agenda = [r]
    while agenda != []:
	r = agenda[0]
	agenda = agenda[1:]
	already.append(r)
	crawl(r)
    print "# ", len(already), "attempts,", successes, "successes."
Ejemplo n.º 12
0
else:
    print "Ok for predictive parsing"

#print "Branch table:", branchTable
print "Literal terminals:", literalTerminals.keys()
print "Token regular expressions:"
for r in tokenRegexps:
    print "\t%s matches %s" % (r, tokenRegexps[r].pattern)

yacc = open(argv[1] + "-yacc.y", "w")
yaccConvert(yacc, document, tokenRegexps)
#while agenda:
#    x = agenda[0]
#    agenda = agenda[1:]
#    already.append(x)
#    yaccProduction(yacc, x, tokenRegexps)
yacc.close()

if len(argv) <= 3: exit(0)
parseFile = argv[3]
ip = webAccess.urlopenForRDF(uripath.join(uripath.base(), parseFile), None)
str = ip.read()
sink = g.newFormula()
p = PredictiveParser(sink=sink,
                     top=document,
                     branchTable=branchTable,
                     tokenRegexps=tokenRegexps)
p.parse(str)

#ends
Ejemplo n.º 13
0
    def crawlFrom(self, addr, prefix, max):
        fmla = self._fmla

        iter = 1
        queue = [addr]
        seen = []
        while queue:
            head = queue.pop()

            progress("crawling at: ", head, " iter ", iter, " of ", max)
            iter = iter + 1
            if iter > max:
                progress ("max limit reached.")
                break

            seen.append(head)

            try:
                rep = urllib2.urlopen(head)
                content = rep.read()
            except IOError:
                progress("can't GET", head)
                continue
                #@@ makeStatement(head type NoGood)

            # try to find a short label for
            # a diagram or some such.
            # try the last path segment,
            # or the 2nd last in case of an empty last segment...
            slash = head[:-1].rfind('/')
            label = head[slash+1:]
            
            ct = rep.info().getheader('content-type')
            progress("... got content of type ", ct)
            isHTML = ct.find('text/html') == 0

            fmla.add(symbol(head),
                     symbol(DC('type')),
                     literal(ct))

            # note that we're not peeking into the URI
            # to find out if it's HTML; we're just
            # eliding the extension in the case we
            # know (from the HTTP headers) that it's HTML.
            if isHTML and label[-5:] == '.html':
                label = label[:-5]

            fmla.add(symbol(head),
                     symbol(RDFS('label')),
                     literal(label))

            if not isHTML: continue
            
            progress("... parsing text/html content")
            doc = libxml2.htmlParseDoc(content, 'us-ascii')
            try:
                titles = doc.xpathNewContext().xpathEval('//title')
                title = titles[0].getContent()
            except: #@@figure out the right exceptions
                pass
            else:
                progress("... found title:", title)
                fmla.add(symbol(head),
                         symbol(DC('title')),
                         literal(str(title)) )
            
            hrefs = doc.xpathNewContext().xpathEval('//a/@href')
            progress("... found ", len(hrefs), " links")
                     
            for h in hrefs:
                h = h.getContent()
                progress("... found href", h)
                i = uripath.join(head, h)
                i = uripath.splitFrag(i)[0]
                progress("... found link", head, ' -> ', i)
                fmla.add(symbol(head),
                         symbol(DC('relation')),
                         symbol(i))
                if i[:len(prefix)] == prefix and i not in seen:
                    queue.append(i)
    def uri_ref2(self, str, i, res):
        """Generate uri from n3 representation.

        Note that the RDF convention of directly concatenating
        NS and local name is now used though I prefer inserting a '#'
        to make the namesapces look more like what XML folks expect.
        """
        qn = []
        j = self.qname(str, i, qn)
        if j >= 0:
            pairFudge = qn[0]
            pfx = pairFudge[0]
            ln = pairFudge[1]
            if pfx is None:
                assertFudge(0, "not used?")
                ns = self._baseURI + ADDED_HASH
            else:
                ns = self._bindings[pfx]
                if not ns:  # @@ pyjs should test undefined
                    if pfx == "_":  # Magic prefix 2001/05/30, can be overridden
                        res.append(self.anonymousNode(ln))
                        return j
                    raise BadSyntax(self._thisDoc, self.lines, str, i,
                                    'Prefix ' + pfx + ' not bound.')
            symb = self._store.newSymbol(ns + ln)
            if symb in self._variables:
                res.append(self._variables[symb])
            else:
                res.append(symb)  # @@@ "#" CONVENTION
            return j

        i = self.skipSpace(str, i)
        if i < 0: return -1

        if str[i] == "?":
            v = []
            j = self.variable(str, i, v)
            if j > 0:  #Forget varibles as a class, only in context.
                res.append(v[0])
                return j
            return -1

        elif str[i] == "<":
            i = i + 1
            st = i
            while i < len(str):
                if str[i] == ">":
                    uref = str[st:i]  # the join should dealt with "":
                    if self._baseURI:
                        uref = uripath.join(self._baseURI, uref)
                    else:
                        assertFudge( ":" in uref, \
                            "With no base URI, cannot deal with relative URIs")
                    if str[i - 1:i] == "#" and not uref[-1:] == "#":
                        uref = uref + "#"  # She meant it! Weirdness in urlparse?
                    symb = self._store.newSymbol(uref)
                    if symb in self._variables:
                        res.append(self._variables[symb])
                    else:
                        res.append(symb)
                    return i + 1
                i = i + 1
            raise BadSyntax(self._thisDoc, self.lines, str, j,
                            "unterminated URI reference")

        elif self.keywordsSet:
            v = []
            j = self.bareWord(str, i, v)
            if j < 0: return -1  #Forget varibles as a class, only in context.
            if v[0] in self.keywords:
                raise BadSyntax(self._thisDoc, self.lines, str, i,
                                'Keyword "' + v[0] + '" not allowed here.')
            res.append(self._store.newSymbol(self._bindings[""] + v[0]))
            return j
        else:
            return -1
Ejemplo n.º 15
0
    def startElementNS(self, name, qname, attrs):
        """ Handle start tag.
        """

        if self._state != STATE_LITERAL:
            self.flush()
        self.bnode = None
        
        tagURI = ((name[0] or "") + name[1])

        if verbosity() > 80:
            indent = ". " * len(self._stack) 
            if not attrs:
                progress(indent+'# State was', self._state, ', start tag: <' + tagURI + '>')
            else:
                str = '# State =%s, start tag= <%s ' %( self._state, tagURI)
                for name, value in attrs.items():
                    str = str + "  " + `name` + '=' + '"' + `value` + '"'
                progress(indent + str + '>')


        self._stack.append([self._state, self._context, self._predicate,
                                self._subject, self._delayedStatement, self._base])
                                
        self._delayedStatement = None

        self._base = uripath.join(self._base, attrs.get((XML_NS_URI, "base"), self._base))
        x = self._base.find("#")
        if x >= 0: self._base = self._base[:x] # See rdf-tests/rdfcore/xmlbase/test013.rdf

        try:
            tagURI = uripath.join(self._base, tagURI)  # If relative, make absolute. Not needed for standard.
        except ValueError:
            pass
                                             # Needed for portable RDF generated with --rdf=z 
        
        self._language = attrs.get((XML_NS_URI, "lang"), None)

        value = attrs.get((RDF_NS_URI, "datatype"), None)
        if value != None: self._datatype = self.sink.newSymbol(self.uriref(value))
        else: self._datatype = None

        if self._state == STATE_OUTERMOST:
            if tagURI == RDF_NS_URI + "RDF":
                self._state = STATE_NO_SUBJECT
            else:
                if "R" not in self.flags:
                    self._state = STATE_NOT_RDF           # Ignore random XML without rdf:RDF
                else:
                    self._nodeElement(tagURI, attrs)    # Parse it as RDF.
                # http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-element-not-mandatory/test001.rdf
                    
        elif self._state == STATE_NOT_RDF:
            if tagURI == RDF_NS_URI + "RDF" and "T" in self.flags:
                self._state = STATE_NO_SUBJECT
            else:
                pass                    # Ignore embedded RDF

        elif self._state == STATE_NO_SUBJECT:  #MS1.0 6.2 obj :: desription | container
            self._nodeElement(tagURI, attrs)
                
        elif self._state == STATE_DESCRIPTION:   # Expect predicate (property) PropertyElt
            #  propertyElt #MS1.0 6.12
            #  http://www.w3.org/2000/03/rdf-tracking/#rdf-containers-syntax-ambiguity
            if tagURI == RDF_NS_URI + "li":
                item = self._items[-1] + 1
                self._predicate = self.sink.newSymbol("%s_%s" % (RDF_NS_URI, item))
                self._items[-1] = item
            else:
                if tagURI in propertyElementExceptions:
                    raise BadSyntax(sys.exc_info(), 'Invalid predicate URI: %s' % tagURI) 
                self._predicate = self.sink.newSymbol(tagURI)

            self._state = STATE_VALUE  # May be looking for value but see parse type
#           self._datatype = None
#           self._language = None
            self.testdata = ""         # Flush value data
            
            # print "\n  attributes:", `attrs`
            properties = []
            gotSubject = 0
            haveResource = 0
            haveParseType = 0
            haveExtras = 0
            for name, value in attrs.items():
                ns, name = name
                if name == "ID":
                    print "# Warning: ID=%s on statement ignored" %  (value) # I consider these a bug
                    raise ValueError("ID attribute?  Reification not supported.")
                elif name == "parseType":
                    haveParseType = 1
#                   x = value.find(":")
#                   if x>=0: pref = value[:x]
#                   else: pref = ""
#                   nsURI = self._nsmap[-1].get(pref, None)
                    if value == "Resource":
                        c = self._context
                        s = self._subject
#                        self._subject = self.sink.newBlankNode(self._context, why=self._reason2)
                        self.idAboutAttr(attrs) #@@ not according to current syntax @@@@@@@@@@@
                        self.sink.makeStatement(( c, self._predicate, s, self._subject), why=self._reason2)
                        self._state = STATE_DESCRIPTION  # Nest description
                        
                    elif value == "Quote":
                            c = self._context
                            s = self._subject
                            self.idAboutAttr(attrs)  # set subject and context for nested description
                            self._subject = self.sink.newFormula()  # Forget anonymous genid - context is subect
                            if self._predicate is self.merge: # magic :-(
                                self._stack[-1][3] = self._subject  # St C P S retrofit subject of outer level!
                                self._delayedStatement = 1 # flag
                            else:
                                self._delayedStatement = c, self._predicate, s, self._subject
                            self._context = self._subject
                            self._subject = None
                            self._state = STATE_NO_SUBJECT  # Inside quote, there is no subject
                        
                    elif (value=="Collection" or
                        value[-11:] == ":collection"):  # Is this a daml:collection qname?

                        self._state = STATE_LIST  # Linked list of obj's
                    elif value == "Literal" or "S" in self.flags:  # Strictly, other types are literal SYN#7.2.20
                        self._state = STATE_LITERAL # That's an XML subtree not a string
                        self._litDepth = 1
                        self.LiteralNS = [{}]
                        self.testdata = '' #"@@sax2rdf.py bug@@" # buggy implementation
                        self._datatype = self.sink.newSymbol("http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral")
                        if XMLLiteralsAsDomTrees:
                            self.domDocument = self.domImplementation.createDocument(
                                'http://www.w3.org/1999/02/22-rdf-syntax-ns', 'envelope', None)
                            self.domElement = self.domDocument.documentElement
                    else:
                        raise SyntaxError("Unknown parse type '%s'" % value )
                elif name == "nodeID":
                    assert not gotSubject
                    if not isXML.isNCName(value):
                        raise  BadSyntax(sys.exc_info(), 'A nodeID must be a NCName %s' % value)
                    obj = self._nodeIDs.get(value, None)
                    if obj == None:
                        obj = self.newBlankNode()
                        self._nodeIDs[value] = obj
                    self.sink.makeStatement((self._context,
                                             self._predicate,
                                             self._subject,
                                             obj ), why=self._reason2)
                    self._state = STATE_NOVALUE  # NOT looking for value
                    self._subject = obj
                    gotSubject = 1
                elif name == "resource":
                    haveResource = 1
                    assert not gotSubject
                    x = self.sink.newSymbol(self.uriref(value)) 
                    self.sink.makeStatement((self._context,
                                             self._predicate,
                                             self._subject,
                                             x ), why=self._reason2)
                    self._state = STATE_NOVALUE  # NOT looking for value
                    self._subject = x
                    gotSubject = 1
                elif name == "datatype":
                    pass # Already set
                elif ns == XML_NS_URI or name[:3] == "xml":  #  Ignore (lang is already done)
                    pass # see rdf-tests/rdfcore/unrecognised-xml-attributes/test002.rdf
                else:
                    haveExtras = 1
                    properties.append((ns, name, value)) # wait till subject is clear
                assert haveResource + haveParseType  <= 1
                assert haveParseType + haveExtras <= 1
            if not gotSubject and properties:
                obj = self.newBlankNode()
                self.sink.makeStatement((self._context,
                                            self._predicate,
                                            self._subject,
                                            obj ), why=self._reason2)
                self._state = STATE_NOVALUE  # NOT looking for value
                self._subject = obj
            
            for ns, name, value in properties:
                self._propertyAttr(ns, name, value)
                
        elif self._state == STATE_LIST:   # damlCollection :: objs - make list
            # Subject and predicate are set and dangling. 
            c = self._context
            s = self._subject  # The tail of the list so far
            p = self._predicate
            pair = self.newBlankNode()        # The new pair
            self.sink.makeStatement(( c,   # Link in new pair
                                      p,
                                      s,
                                      pair ), why=self._reason2) 
            self.idAboutAttr(attrs)  # set subject (the next item) and context 
            if tagURI != RDF_NS_URI + "Description":
                self.sink.makeStatement((c,
                                         self.sink.newSymbol(RDF_NS_URI
                                                             +"type"),
                                         self._subject,
                                         self.sink.newSymbol(tagURI) ),
                                        why=self._reason2)

            self.sink.makeStatement(( c,
                                      self.sink.newSymbol(List_NS + "first"),
                                      pair,
                                      self._subject), why=self._reason2) # new item
            if "S" in self.flags: # Strictly to spec
                self.sink.makeStatement(( c,
                                        self.sink.newSymbol(RDF_NS_URI + "type"),
                                        self.sink.newSymbol(List_NS + "List"),
                                        self._subject), why=self._reason2) # new item
            
            self._stack[-1][2] = self.sink.newSymbol(List_NS + "rest")  # Leave dangling link   #@check
            self._stack[-1][3] = pair  # Underlying state tracks tail of growing list

         
        elif self._state == STATE_VALUE:   # Value :: Obj in this case #MS1.0 6.17  6.2
            c = self._context
            p = self._predicate
            s = self._subject
            self._nodeElement(tagURI, attrs)   # Parse the object thing's attributes
            self.sink.makeStatement((c, p, s, self._subject), why=self._reason2)
            
            self._stack[-1][0] = STATE_NOVALUE  # When we return, cannot have literal now

        elif self._state == STATE_NOVALUE:
            str = ""
            for e in self._stack: str = str + `e`+"\n"
            raise BadSyntax(sys.exc_info(), """Expected no value, found name=%s; qname=%s, attrs=%s
            in nested context:\n%s""" %(name, qname, attrs, str))

        elif self._state == STATE_LITERAL:
            self._litDepth = self._litDepth + 1
            if XMLLiteralsAsDomTrees:
#               progress("@@@ XML literal name: ", name)
                self.literal_element_start_DOM(name, qname, attrs)
            else:
                self.literal_element_start(name, qname, attrs)
            #@@ need to capture the literal
        else:
            raise RuntimeError, ("Unknown state in RDF parser", self._stack) # Unknown state
Ejemplo n.º 16
0
def load(store, uri=None, openFormula=None, asIfFrom=None, contentType=None,
                flags="", referer=None, why=None, topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
#    if referer is None:
#        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri) # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %(addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40: progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN") # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + `receivedContentType` + " for "+addr)
            if receivedContentType.find('xml') >= 0 or (
                     receivedContentType.find('rdf')>=0
                     and not (receivedContentType.find('n3')>=0)  ):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/n3"
        if guess== None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + `contentType` + " for "+addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0  and not (contentType.find('n3') >= 0 )):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                            guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >=0 or buffer.find('xmlns:') >=0: #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):  
        raise DocumentAccessError(addr, sys.exc_info() )
        
    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer), sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
#       import sax2rdf, xml.sax._exceptions
#       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store, F,  thisDoc=asIfFrom, flags=flags,
                parser=parser, why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store, F,  thisDoc=asIfFrom,flags=flags, why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise
        
    if not openFormula:
        F = F.close()
    return F 
Ejemplo n.º 17
0
 def bind(self, prefix, ns):
     # if ns.endswith('#'): ns, sep = ns[:-1], '#'
     # else: sep = ''
     ns = uripath.join(self._baseURI, ns)  # + sep
     self._bindings[prefix] = ns
     self._sink.bind(prefix, (URI, ns))
 def load(self, uri, baseURI=""):
     if uri:
         uri = uripath.join(baseURI, uri) # Make abs from relative
     source = URLInputSource(uri)                                        
     self.parse(source)
def absolutize(uri, baseURI):
    return uripath.join(baseURI, uri)
 def __getitem__(self, other):
     return join(self, other)
Ejemplo n.º 21
0
    def crawlFrom(self, addr, prefix, max):
        fmla = self._fmla

        iter = 1
        queue = [addr]
        seen = []
        while queue:
            head = queue.pop()

            progress("crawling at: ", head, " iter ", iter, " of ", max)
            iter = iter + 1
            if iter > max:
                progress("max limit reached.")
                break

            seen.append(head)

            try:
                rep = urllib2.urlopen(head)
                content = rep.read()
            except IOError:
                progress("can't GET", head)
                continue
                #@@ makeStatement(head type NoGood)

            # try to find a short label for
            # a diagram or some such.
            # try the last path segment,
            # or the 2nd last in case of an empty last segment...
            slash = head[:-1].rfind('/')
            label = head[slash + 1:]

            ct = rep.info().getheader('content-type')
            progress("... got content of type ", ct)
            isHTML = ct.find('text/html') == 0

            fmla.add(symbol(head), symbol(DC('type')), literal(ct))

            # note that we're not peeking into the URI
            # to find out if it's HTML; we're just
            # eliding the extension in the case we
            # know (from the HTTP headers) that it's HTML.
            if isHTML and label[-5:] == '.html':
                label = label[:-5]

            fmla.add(symbol(head), symbol(RDFS('label')), literal(label))

            if not isHTML: continue

            progress("... parsing text/html content")
            doc = libxml2.htmlParseDoc(content, 'us-ascii')
            try:
                titles = doc.xpathNewContext().xpathEval('//title')
                title = titles[0].getContent()
            except:  #@@figure out the right exceptions
                pass
            else:
                progress("... found title:", title)
                fmla.add(symbol(head), symbol(DC('title')),
                         literal(str(title)))

            hrefs = doc.xpathNewContext().xpathEval('//a/@href')
            progress("... found ", len(hrefs), " links")

            for h in hrefs:
                h = h.getContent()
                progress("... found href", h)
                i = uripath.join(head, h)
                i = uripath.splitFrag(i)[0]
                progress("... found link", head, ' -> ', i)
                fmla.add(symbol(head), symbol(DC('relation')), symbol(i))
                if i[:len(prefix)] == prefix and i not in seen:
                    queue.append(i)
Ejemplo n.º 22
0
    def uri_ref2(self, str, i, res):
        """Generate uri from n3 representation.

        Note that the RDF convention of directly concatenating
        NS and local name is now used though I prefer inserting a '#'
        to make the namesapces look more like what XML folks expect.
        """
        qn = []
        j = self.qname(str, i, qn)
        if j >= 0:
            pairFudge = qn[0]
            pfx = pairFudge[0]
            ln = pairFudge[1]
            if pfx is None:
                assertFudge(0, "not used?")
                ns = self._baseURI + ADDED_HASH
            else:
                ns = self._bindings[pfx]
                if not ns:  # @@ pyjs should test undefined
                    if pfx == "_":  # Magic prefix 2001/05/30, can be overridden
                        res.append(self.anonymousNode(ln))
                        return j
                    raise BadSyntax(self._thisDoc, self.lines, str, i, "Prefix " + pfx + " not bound.")
            symb = self._store.newSymbol(ns + ln)
            if symb in self._variables:
                res.append(self._variables[symb])
            else:
                res.append(symb)  # @@@ "#" CONVENTION
            return j

        i = self.skipSpace(str, i)
        if i < 0:
            return -1

        if str[i] == "?":
            v = []
            j = self.variable(str, i, v)
            if j > 0:  # Forget varibles as a class, only in context.
                res.append(v[0])
                return j
            return -1

        elif str[i] == "<":
            i = i + 1
            st = i
            while i < len(str):
                if str[i] == ">":
                    uref = str[st:i]  # the join should dealt with "":
                    if self._baseURI:
                        uref = uripath.join(self._baseURI, uref)
                    else:
                        assertFudge(":" in uref, "With no base URI, cannot deal with relative URIs")
                    if str[i - 1 : i] == "#" and not uref[-1:] == "#":
                        uref = uref + "#"  # She meant it! Weirdness in urlparse?
                    symb = self._store.newSymbol(uref)
                    if symb in self._variables:
                        res.append(self._variables[symb])
                    else:
                        res.append(symb)
                    return i + 1
                i = i + 1
            raise BadSyntax(self._thisDoc, self.lines, str, j, "unterminated URI reference")

        elif self.keywordsSet:
            v = []
            j = self.bareWord(str, i, v)
            if j < 0:
                return -1  # Forget varibles as a class, only in context.
            if v[0] in self.keywords:
                raise BadSyntax(self._thisDoc, self.lines, str, i, 'Keyword "' + v[0] + '" not allowed here.')
            res.append(self._store.newSymbol(self._bindings[""] + v[0]))
            return j
        else:
            return -1
Ejemplo n.º 23
0
    def directive(self, str, i):
        j = self.skipSpace(str, i)
        if j<0: return j # eof
        res = []
        
        j = self.tok('bind', str, i)        # implied "#". Obsolete.
        if j>0: raise BadSyntax(self._thisDoc, self.lines, str, i,
                                "keyword bind is obsolete: use @prefix")

        j = self.tok('keywords', str, i)
        if j>0:
            i = self.commaSeparatedList(str, j, res, self.bareWord)
            if i < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, i,
                    "'@keywords' needs comma separated list of words")
            self.setKeywords(res[:])
            if diag.chatty_flag > 80: progress("Keywords ", self.keywords)
            return i


        j = self.tok('forAll', str, i)
        if j > 0:
            i = self.commaSeparatedList(str, j, res, self.uri_ref2)
            if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
                        "Bad variable list after @forAll")
            for x in res:
                #self._context.declareUniversal(x)
                if x not in self._variables or x in self._parentVariables:
                    self._variables[x] =  self._context.newUniversal(x)
            return i

        j = self.tok('forSome', str, i)
        if j > 0:
            i = self. commaSeparatedList(str, j, res, self.uri_ref2)
            if i <0: raise BadSyntax(self._thisDoc, self.lines, str, i,
                    "Bad variable list after @forSome")
            for x in res:
                self._context.declareExistential(x)
            return i


        j=self.tok('prefix', str, i)   # no implied "#"
        if j>=0:
            t = []
            i = self.qname(str, j, t)
            if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
                                "expected qname after @prefix")
            j = self.uri_ref2(str, i, t)
            if j<0: raise BadSyntax(self._thisDoc, self.lines, str, i,
                                "expected <uriref> after @prefix _qname_")
            ns = self.uriOf(t[1])

            if self._baseURI:
                ns = join(self._baseURI, ns)
            elif ":" not in ns:
                 raise BadSyntax(self._thisDoc, self.lines, str, j,
                    "With no base URI, cannot use relative URI in @prefix <"+ns+">")
            assert ':' in ns # must be absolute
            self._bindings[t[0][0]] = ns
            self.bind(t[0][0], hexify(ns))
            return j

        j=self.tok('base', str, i)      # Added 2007/7/7
        if j >= 0:
            t = []
            i = self.uri_ref2(str, j, t)
            if i<0: raise BadSyntax(self._thisDoc, self.lines, str, j,
                                "expected <uri> after @base ")
            ns = self.uriOf(t[0])

            if self._baseURI:
                ns = join(self._baseURI, ns)
            elif ':' not in ns:
                raise BadSyntax(self._thisDoc, self.lines, str, j,
                    "With no previous base URI, cannot use relative URI in @base  <"+ns+">")
            assert ':' in ns # must be absolute
            self._baseURI = ns
            return i

        return -1  # Not a directive, could be something else.
Ejemplo n.º 24
0
 def uriref(self, str):
     return Symbol(uripath.join(self._baseURI, str[1:-1]))
Ejemplo n.º 25
0
    for s in errors: print "\t%s" % s
    exit(-2)
else:
    print "Ok for predictive parsing"

#print "Branch table:", branchTable
print "Literal terminals:", literalTerminals.keys()
print "Token regular expressions:"
for r in tokenRegexps: print "\t%s matches %s" %(r, tokenRegexps[r].pattern) 

yacc=open(argv[1]+"-yacc.y", "w")
yaccConvert(yacc, document, tokenRegexps)
#while agenda:
#    x = agenda[0]
#    agenda = agenda[1:]
#    already.append(x)
#    yaccProduction(yacc, x, tokenRegexps)
yacc.close()

if len(argv) <= 3: exit(0)
parseFile = argv[3]
ip = webAccess.urlopenForRDF(uripath.join(uripath.base(), parseFile), None)
str = ip.read()
sink = g.newFormula()
p = PredictiveParser(sink=sink, top=document, branchTable= branchTable,
	tokenRegexps= tokenRegexps)
p.parse(str)

    
#ends
Ejemplo n.º 26
0
    def __init__(self, mainURL, mergeFunction, provMergeFunction=None):
        # Try to be smart.  We might have accidentally been passed the
        # provCell or dataCell.
        bus = dbus.SystemBus()
        propman = bus.get_object('edu.mit.csail.dig.DPropMan',
                                 '/DPropMan')
        self.initURLType = None
        self.url = mainURL
        self.uuid = str(propman.registerRemoteCell(self.url,
                                                   dbus_interface='edu.mit.csail.dig.DPropMan'))
        self.dbusCell = bus.get_object('edu.mit.csail.dig.DPropMan',
                                       '/Cells/%s' % (self.uuid))
        self.dbusCell.connect_to_signal('UpdateSignal',
                                        lambda raw_data, peer: self.typeChecker(str(raw_data), str(peer)),
                                        dbus_interface='edu.mit.csail.dig.DPropMan.Cell')
        self.dbusCell.connectToRemote(self.url,
                                      dbus_interface='edu.mit.csail.dig.DPropMan.Cell')
        
        # Wait for the typeChecker to have gotten something.
        # WARNING: We assume python-gobject is being used.
        while self.initURLType is None:
            gobject.MainLoop().get_context().iteration(True)
        
        self.provCell = None
        self.dataCell = None
        if self.initURLType == 'dpropProvCell':
            print "Actually passed a provenance cell!"
            
            provURL = mainURL
            self.provCell = RemoteCell(provURL, lambda provCell, raw_data, peer: self.provCellMerge(provMergeFunction, str(raw_data), str(peer)))
            
            self.provCellMerge(provMergeFunction,
                               str(self.initRawData),
                               str(self.initPeer))
            
            provData = dpropjson.loads(self.provCell.data())
            if isinstance(provData, dict):
                if 'type' not in provData or 'mainCell' not in provData or 'data' not in provData:
                    raise DPropException("Provenance cell doesn't match expected cell type!")
                elif provData['type'] != 'dpropProvCell':
                    raise DPropException("Provenance cell wasn't a dpropProvCell!")
            else:
                raise DPropException("Provenance cell didn't contain a dictionary!")
            
            mainURL = uripath.join(provURL, provData['provCell'])
        elif self.initURLType == 'dpropDataCell':
            print "Actually passed a data cell!"
            
            dataURL = mainURL
            self.dataCell = RemoteCell(dataURL, lambda dataCell, raw_data, peer: self.dataCellMerge(mergeFunction, str(raw_data), str(peer)))
            
            self.dataCellMerge(mergeFunction,
                               str(self.initRawData),
                               str(self.initPeer))
            
            dataData = dpropjson.loads(self.dataCell.data())
            print dataData
            if isinstance(dataData, dict):
                if 'type' not in dataData or 'mainCell' not in dataData or 'data' not in dataData:
                    raise DPropException("Data cell doesn't match expected cell type!")
                elif dataData['type'] != 'dpropDataCell':
                    raise DPropException("Data cell wasn't a dpropDataCell!")
            else:
                raise DPropException("Data cell didn't contain a dictionary!")
            
            mainURL = uripath.join(dataURL, dataData['mainCell'])

        print mainURL
        self.mainCell = RemoteCell(mainURL, lambda mainCell, raw_data, peer: self.mainCellMerge(str(raw_data), str(peer)))
        self.uuid = self.mainCell.uuid
        
        if self.initURLType == 'dpropMainCell':
            print "Passed a main cell"
            self.mainCellMerge(str(self.initRawData),
                               str(self.initPeer))
        
        mainData = dpropjson.loads(self.mainCell.data())
        
        while isinstance(mainData, dpropjson.Nothing):
            # Gotta wait a few cycles (I really don't like this...)
            gobject.MainLoop().get_context().iteration(True)
            mainData = dpropjson.loads(self.mainCell.data())
        
        if isinstance(mainData, dict):
            if 'type' not in mainData or 'provCell' not in mainData or 'dataCell' not in mainData:
                raise DPropException("Main cell doesn't match expected cell type!")
            elif mainData['type'] != 'dpropMainCell':
                raise DPropException("Main cell wasn't a dpropMainCell!")
        else:
            raise DPropException("Main cell didn't contain a dictionary!")
        
        # URL for the provCell is relative to mainCell.
        if self.provCell == None:
            provURL = uripath.join(mainURL, mainData['provCell'])
            self.provCell = RemoteCell(provURL, lambda provCell, raw_data, peer: self.provCellMerge(provMergeFunction, str(raw_data), str(peer)))
        if self.dataCell == None:
            dataURL = uripath.join(mainURL, mainData['dataCell'])
            self.dataCell = RemoteCell(dataURL, lambda dataCell, raw_data, peer: self.dataCellMerge(mergeFunction, str(raw_data), str(peer)))
        
        provData = dpropjson.loads(self.provCell.data())
        
        while isinstance(provData, dpropjson.Nothing):
            # Gotta wait a few cycles (I really don't like this...)
            gobject.MainLoop().get_context().iteration(True)
            provData = dpropjson.loads(self.provCell.data())
        
        if isinstance(provData, dict):
            if 'type' not in provData or 'mainCell' not in provData or 'data' not in provData:
                raise DPropException("Provenance cell doesn't match expected cell type!")
            elif provData['type'] != 'dpropProvCell':
                raise DPropException("Provenance cell wasn't a dpropProvCell!")
            elif provData['mainCell'] != self.mainCell.uuid:
                raise DPropException("Provenance cell's mainCell doesn't match expected UUID!")
        else:
            raise DPropException("Provenance cell didn't contain a dictionary!")

        dataData = dpropjson.loads(self.dataCell.data())
        
        while isinstance(dataData, dpropjson.Nothing):
            # Gotta wait a few cycles (I really don't like this...)
            gobject.MainLoop().get_context().iteration(True)
            dataData = dpropjson.loads(self.dataCell.data())
        
        if isinstance(dataData, dict):
            if 'type' not in dataData or 'mainCell' not in dataData or 'data' not in dataData:
                raise DPropException("Data cell doesn't match expected cell type!")
            elif dataData['type'] != 'dpropDataCell':
                raise DPropException("Data cell wasn't a dpropDataCell!")
            elif dataData['mainCell'] != self.mainCell.uuid:
                raise DPropException("Data cell's mainCell doesn't match expected UUID!")
        else:
            raise DPropException("Data cell didn't contain a dictionary!")
Ejemplo n.º 27
0
 def evaluateObject(self, subj_py):
     import uripath
     there, base = subj_py
     return uripath.join(base, there)
Ejemplo n.º 28
0
def absolutize(uri, baseURI):
    return uripath.join(baseURI, uri)
Ejemplo n.º 29
0
 def uriref(self, s):
     return (URI, uripath.join(self._baseURI, s[1:-1]))
Ejemplo n.º 30
0
 def evaluateObject(self, subj_py):
     import uripath
     there, base = subj_py
     return uripath.join(base, there)
Ejemplo n.º 31
0
    def uriref(self, str):
        """ Generate uri from uriref in this document
        unicode strings OK.
        """

        return uripath.join(self._base, str)
Ejemplo n.º 32
0
def load(store,
         uri=None,
         openFormula=None,
         asIfFrom=None,
         contentType=None,
         flags="",
         referer=None,
         why=None,
         topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
    #    if referer is None:
    #        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri)  # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %
                         (addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(
                HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40:
                progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN")  # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + ` receivedContentType ` +
                         " for " + addr)
            if receivedContentType.find('xml') >= 0 or (
                    receivedContentType.find('rdf') >= 0
                    and not (receivedContentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/rdf+n3"
        if guess == None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + ` contentType ` + " for " +
                         addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0
                    and not (contentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/rdf+n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/rdf+n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >= 0 or buffer.find('xmlns:') >= 0:  #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/rdf+n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):
        raise DocumentAccessError(addr, sys.exc_info())

    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer),
                                   sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
        #       import sax2rdf, xml.sax._exceptions
        #       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store,
                                F,
                                thisDoc=asIfFrom,
                                flags=flags,
                                parser=parser,
                                why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/rdf+n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store,
                                     F,
                                     thisDoc=asIfFrom,
                                     flags=flags,
                                     why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise

    if not openFormula:
        F = F.close()
    return F
    def directive(self, str, i):
        j = self.skipSpace(str, i)
        if j < 0: return j  # eof
        res = []

        j = self.tok('bind', str, i)  # implied "#". Obsolete.
        if j > 0:
            raise BadSyntax(self._thisDoc, self.lines, str, i,
                            "keyword bind is obsolete: use @prefix")

        j = self.tok('keywords', str, i)
        if j > 0:
            i = self.commaSeparatedList(str, j, res, false)
            if i < 0:
                raise BadSyntax(
                    self._thisDoc, self.lines, str, i,
                    "'@keywords' needs comma separated list of words")
            self.setKeywords(res[:])
            if diag.chatty_flag > 80: progress("Keywords ", self.keywords)
            return i

        j = self.tok('forAll', str, i)
        if j > 0:
            i = self.commaSeparatedList(str, j, res, true)
            if i < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, i,
                                "Bad variable list after @forAll")
            for x in res:
                #self._context.declareUniversal(x)
                if x not in self._variables or x in self._parentVariables:
                    self._variables[x] = self._context.newUniversal(x)
            return i

        j = self.tok('forSome', str, i)
        if j > 0:
            i = self.commaSeparatedList(str, j, res, self.uri_ref2)
            if i < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, i,
                                "Bad variable list after @forSome")
            for x in res:
                self._context.declareExistential(x)
            return i

        j = self.tok('prefix', str, i)  # no implied "#"
        if j >= 0:
            t = []
            i = self.qname(str, j, t)
            if i < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, j,
                                "expected qname after @prefix")
            j = self.uri_ref2(str, i, t)
            if j < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, i,
                                "expected <uriref> after @prefix _qname_")
            ns = t[1].uri  # pyjs was uriref()
            if self._baseURI:
                ns = join(self._baseURI, ns)
            else:
                assertFudge(
                    ":" in ns,
                    "With no base URI, cannot handle relative URI for NS")
            assertFudge(':' in ns)  # must be absolute
            self._bindings[t[0][0]] = ns
            self.bind(t[0][0], hexify(ns))
            return j

        j = self.tok('base', str, i)  # Added 2007/7/7
        if j >= 0:
            t = []
            i = self.uri_ref2(str, j, t)
            if i < 0:
                raise BadSyntax(self._thisDoc, self.lines, str, j,
                                "expected <uri> after @base ")
            ns = t[0].uri  # pyjs was uriref()

            if self._baseURI:
                ns = join(self._baseURI, ns)
            else:
                raise BadSyntax(
                    self._thisDoc, self.lines, str, j,
                    "With no previous base URI, cannot use relative URI in @base  <"
                    + ns + ">")
            assertFudge(':' in ns)  # must be absolute
            self._baseURI = ns
            return i

        return -1  # Not a directive, could be something else.
Ejemplo n.º 34
0
 def __getitem__(self, other):
     return join(self, other)
Ejemplo n.º 35
0
Archivo: cwm_os.py Proyecto: weyls/swap
 def evaluateObject(self, subj_py):
     if verbosity() > 80: progress("os:baseAbsolute input:"+`subj_py`)
     if isString(subj_py):
         return uripath.join(uripath.base(), subj_py)
     progress("Warning: os:baseAbsolute input is not a string: "+`subj_py`)
 def uriref(self, str):
     return Symbol(uripath.join(self._baseURI, str[1:-1]))