def doCommand(): """Command line RDF/N3 crawler crawl <uriref> options: See http://www.w3.org/2000/10/swap/doc/cwm for more documentation. """ global agenda global already uriref = sys.argv[1] uri = join(base(), uriref) r = symbol(uri) diag.setVerbosity(0) print "@prefix : <http://www.w3.org/2000/10/swap/util/semweb#>." print "# Generated by crawl.py ", cvsRevision[1:-1] agenda = [r] while agenda != []: r = agenda[0] agenda = agenda[1:] already.append(r) crawl(r) print "# ", len(already), "attempts,", successes, "successes."
# from xml.dom.minidom import parse, Text from diag import verbosity, setVerbosity, progress import thing, llyn kb = thing.formula() setVerbosity(99) def do(ele, level=0): if isinstance(ele, Text): if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue) return None ln = ele.localName if verbosity() > 20: progress(" " * level, ln) if ln == "dict": me = kb.newBlankNode() n = len(ele.childNodes) i = 0 pred = None while i < n: e = ele.childNodes[i] if isinstance(e, Text): if verbosity() > 70: progress("Ignoring text '%s'" % e.nodeValue) i = i + 1 continue if e.localName == "key": property = e.firstChild.data if not property: property = "nullProp" pred = kb.newSymbol(property)
""" import urllib2 # eliminate this dependency; use mnot's HtmlDom instead # http://www.mnot.net/python/HtmlDom.py # <AaronSw> it lets me do: d = fetch(url); print xml.xpath.Evaluate("//*[@class='rss:item']/text()", d) import libxml2 # http://xmlsoft.org/python.html , DebianPackage:libxml2-python2.1 won't work because llyn.py uses 2.2isms # http://www.w3.org/2000/10/swap/ from myStore import Namespace, load, symbol, literal, formula, bind import myStore import uripath, toXML # http://www.w3.org/2000/10/swap/ from RDFSink import SYMBOL, LITERAL, FORMULA import diag diag.setVerbosity(0) def DC(ln): return 'http://purl.org/dc/elements/1.1/' + ln def RDFS(ln): return 'http://www.w3.org/2000/01/rdf-schema#' + ln class Crawler: def __init__(self, fmla, here): self._fmla = fmla def crawlFrom(self, addr, prefix, max):
""" import urllib2 # eliminate this dependency; use mnot's HtmlDom instead # http://www.mnot.net/python/HtmlDom.py # <AaronSw> it lets me do: d = fetch(url); print xml.xpath.Evaluate("//*[@class='rss:item']/text()", d) import libxml2 # http://xmlsoft.org/python.html , DebianPackage:libxml2-python2.1 won't work because llyn.py uses 2.2isms # http://www.w3.org/2000/10/swap/ from myStore import Namespace, load, symbol, literal, formula, bind import myStore import uripath, toXML # http://www.w3.org/2000/10/swap/ from RDFSink import SYMBOL, LITERAL, FORMULA import diag diag.setVerbosity(0) def DC(ln): return 'http://purl.org/dc/elements/1.1/' + ln def RDFS(ln): return 'http://www.w3.org/2000/01/rdf-schema#' + ln class Crawler: def __init__(self, fmla, here): self._fmla = fmla def crawlFrom(self, addr, prefix, max): fmla = self._fmla
# from xml.dom.minidom import parse, Text from diag import verbosity, setVerbosity, progress import thing, llyn kb=thing.formula() setVerbosity(99) def do(ele, level=0): if isinstance(ele, Text): if verbosity() > 70: progress("Ignoring text '%s'" % ele.nodeValue) return None ln = ele.localName if verbosity() > 20: progress(" "*level, ln) if ln == "dict": me = kb.newBlankNode() n = len(ele.childNodes) i = 0 pred = None while i<n: e = ele.childNodes[i] if isinstance(e, Text): if verbosity() > 70: progress("Ignoring text '%s'" % e.nodeValue) i = i + 1 continue if e.localName == "key": property = e.firstChild.data if not property: property = "nullProp" pred = kb.newSymbol(property) else: