def main(argv): data, lang = argv[-2:] f = myStore.load(data) lang = f.newSymbol(lang) it = { "rules": asGrammar(f, lang), "tokens": tokens(f, lang), "first": sets(f, lang, EBNF.first), "follow": sets(f, lang, EBNF.follow), } if "--pprint" in argv: from pprint import pprint pprint(it) elif "--yacc" in argv: toYacc(it) elif "--ply" in argv: toPly(it) else: import simplejson # http://cheeseshop.python.org/pypi/simplejson import sys start = it["rules"][0][0] print "SYNTAX_%s = " % start, simplejson.dump(it, sys.stdout)
def main(argv): try: ref = argv[1] except: raise Usage("missing input file/URI") f = load(ref) rdf2dot(sys.stdout.write, f)
def get_productions(uri): g = load(uri) rules = {} for triple in g.statementsMatching(pred=BNF.mustBeOneSequence): lhs, p, rhs = triple.spo() lhs = lhs.uriref() rhs = [[y.uriref() for y in x] for x in rhs] rules[lhs] = Production(lhs, rhs) return rules
def get(self, uri): f = self._loaded.get(uri, None) if f == None: setVerbosity(debugLevelForParsing) f = load(uri, flags="B").close() # why B? -DWC setVerbosity(0) self._loaded[uri] = f assert f.canonical is f, `f.canonical` return f
def topLevelLoad(uri=None, flags=''): graph = formula() graph.setClosureMode("e") # Implement sameAs by smushing graph = load(uri, flags=flags, openFormula=graph) bindings = {} for s in graph.statementsMatching(pred=reason.representedBy): val, _, key = s.spo() bindings[key] = val return graph.substitution(bindings)
def get(self, uri): f = self._loaded.get(uri, None) if f == None: setVerbosity(debugLevelForParsing) f = load(uri, flags="B").close() # why B? -DWC setVerbosity(0) self._loaded[uri] = f assert f.canonical is f, ` f.canonical ` return f
def main(): import sys inputFile = sys.argv[1] baseRegex = sys.argv[2] f = myStore.load(inputFile) base = myStore.symbol(baseRegex) g = u'(?P<foo>%s)' % makeRegex(f, base) print ` g ` import re c = re.compile(g) print c.match('3.4E-4').groups()
def main(args): if not args[1:]: usage() sys.exit(1) addr = uripath.join("file:" + os.getcwd() + "/", args[1]) progress("loading...", addr) kb = load(addr) progress("exporting...") extractLDIF(kb)
def main(): import sys inputFile = sys.argv[1] baseRegex = sys.argv[2] f = myStore.load(inputFile) base = myStore.symbol(baseRegex) g = u"(?P<foo>%s)" % makeRegex(f, base) print ` g ` import re c = re.compile(g) print c.match("3.4E-4").groups()
def from_string(s): if False: #@@ looks like some of the 'why' magic in load is needed store = llyn.RDFStore() proof = notation3.SinkParser(store, baseURI=uripath.base()) proof.startDoc() proof.feed(s.encode('utf-8')) proof.endDoc() #proof.close() #@@ store close? return formula.Formula(store) else: from tempfile import mktemp filename = mktemp() tmpfile = file(filename, "w") tmpfile.write(s) tmpfile.close() from swap.myStore import load return load(filename)
def importTokens(): global tokens if tokens is None: try: t0 = time.time() from sparql_tokens_table import tokens as ts, regexps as rs t1 = time.time() print >> sys.stderr, 'loaded from file ', t1 - t0 tokens = ts for k, v in rs.iteritems(): setattr(Tokens, k, v) except ImportError: from swap import myStore store = myStore._checkStore() F = myStore.load('http://www.w3.org/2000/10/swap/grammar/sparql') BNF = myStore.Namespace("http://www.w3.org/2000/10/swap/grammar/bnf#") regexps = {} k = F.statementsMatching(pred=BNF.tokens) if len(k) != 1: raise RuntimeError("Expected 1 occurrence of bnf:tokens, got %i: %s" % (len(k), `k`)) for triple in k: tokens = [x.uriref() for x in triple.object()] tokens.append(BNF.PASSED_TOKENS.uriref()) for triple in F.statementsMatching(pred=BNF.matches): s, p, o = triple.spo() key = s.uriref() val = o.value() if key in tokens: setattr(Tokens, 't_' + key, val) regexps['t_' + key] = val setattr(Tokens, 'c_' + key, smartCompile(val, re.I)) regexps['c_' + key] = smartCompile(val, re.I) pklVal = {'tokens': tokens, 'regexps': regexps} try: import imp, os.path try: path = imp.find_module('sparql')[1] except ImportError: path = '' # path = '' f = file(os.path.join(path, 'sparql_tokens_table.py'), 'w') mkmodule(pklVal, f) f.close() except: raise
def main(args): if not args[1:]: usage() sys.exit(1) c = CalWr(sys.stdout.write) if args[3:] and args[1] == '--floattz': tz = args[2] c.floatTZ = tz del args[1:3] addr = uripath.join("file:" + os.getcwd() + "/", args[1]) progress("loading...", addr) sts = load(addr) progress("exporting...") c.export(sts, addr)
def main(argv): data, lang = argv[-2:] f = myStore.load(data) lang = f.newSymbol(lang) it = { 'rules': asGrammar(f, lang), 'tokens': tokens(f, lang), 'first': sets(f, lang, EBNF.first), 'follow': sets(f, lang, EBNF.follow), } if '--pprint' in argv: from pprint import pprint pprint(it) elif '--yacc' in argv: toYacc(it) elif '--ply' in argv: toPly(it) else: import simplejson #http://cheeseshop.python.org/pypi/simplejson import sys start = it['rules'][0][0] print "SYNTAX_%s = " % start, simplejson.dump(it, sys.stdout)
output = None for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit() if o in ("-v", "--verbose"): verbose = 1 if o in ("-g", "--gpsData"): gpsData = a events = [] photoMetaFileName = commandLineArg('photometa') if photoMetaFileName: if verbose: progress("Loading Photo data..." + photoMetaFileName) f = load(photoMetaFileName) # Was gpsData + "/PhotoMeta.n3" if verbose: progress("Loaded.") ss = f.statementsMatching(pred=FILE.date) for s in ss: ph = s.subject() photo = str(ph) date = str(s.object()) da = f.any(subj=ph, pred=EXIF.dateTime) if da != None: date = str(da) else: progress("Warning: using file date %s for %s" % (date, photo)) events.append((date, "P", (ph, photo))) if verbose: progress("%s: %s" % (date, photo))
output = None for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit() if o in ("-v", "--verbose"): verbose = 1 if o in ("-g", "--gpsData"): gpsData = a events = [] photoMetaFileName = commandLineArg('photometa'); if photoMetaFileName: if verbose: progress( "Loading Photo data..." + photoMetaFileName) f = load(photoMetaFileName) # Was gpsData + "/PhotoMeta.n3" if verbose: progress( "Loaded.") ss = f.statementsMatching(pred=FILE.date) for s in ss: ph = s.subject() photo = str(ph) date = str(s.object()) da = f.any(subj=ph, pred=EXIF.dateTime) if da != None: date = str(da) else: progress("Warning: using file date %s for %s" %(date, photo)) events.append((date, "P", (ph, photo))) if verbose: progress("%s: %s" %(date, photo))
def main(): global already, agenda, errors parseAs = None grammarFile = None parseFile = None yaccFile = None global verbose global g verbose = 0 lumped = 1 try: opts, args = getopt.getopt( sys.argv[1:], "ha:v:p:g:y:", ["help", "as=", "verbose=", "parse=", "grammar=", "yacc="]) except getopt.GetoptError: usage() sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-v", "--verbose"): verbose = int(a) diag.chatty_flag = int(a) if o in ("-a", "--as"): parseAs = uripath.join(uripath.base(), a) if o in ("-p", "--parse"): parseFile = uripath.join(uripath.base(), a) if o in ("-g", "--grammar"): grammarFile = uripath.join(uripath.base(), a) if o in ("-y", "--yacc"): yaccFile = uripath.join(uripath.base(), a)[5:] # strip off file: # if testFiles == []: testFiles = [ "/dev/stdin" ] if not parseAs: usage() sys.exit(2) parseAs = uripath.join(uripath.base(), parseAs) if not grammarFile: grammarFile = parseAs.split("#")[0] # strip off fragid else: grammarFile = uripath.join(uripath.base(), grammarFile) # The Grammar formula progress("Loading " + grammarFile) start = clock() g = load(grammarFile) taken = clock() - start + 1 progress("Loaded %i statements in %fs, ie %f/s." % (len(g), taken, len(g) / taken)) document = g.newSymbol(parseAs) already = [] agenda = [] errors = [] doProduction(document) while agenda: x = agenda[0] agenda = agenda[1:] already.append(x) doProduction(x) if errors != []: progress("###### FAILED with %i errors." % len(errors)) for s in errors: progress("\t%s" % s) exit(-2) else: progress("Ok for predictive parsing") #if parser.verb: progress "Branch table:", branchTable if verbose: progress("Literal terminals: %s" % literalTerminals.keys()) progress("Token regular expressions:") for r in tokenRegexps: progress("\t%s matches %s" % (r, tokenRegexps[r].pattern)) if yaccFile: yacc = open(yaccFile, "w") yaccConvert(yacc, document, tokenRegexps) yacc.close() if parseFile == None: exit(0) ip = webAccess.urlopenForRDF(parseFile, None) lexer = sparql_tokens.Lexer() lexer.input(ip) #str = ip.read().decode('utf_8') sink = g.newFormula() keywords = g.each(pred=BNF.keywords, subj=document) keywords = [a.value() for a in keywords] p = PredictiveParser(sink=sink, top=document, branchTable=branchTable, tokenSet=tokenSet, keywords=keywords) p.verb = 1 start = clock() #print lexer.token() print p.parse(lexer.token) taken = clock() - start + 1 # progress("Loaded %i chars in %fs, ie %f/s." % # (len(str), taken, len(str)/taken)) progress("Parsed <%s> OK" % parseFile) sys.exit(0) # didn't crash
def main(): global already, agenda, errors parseAs = None grammarFile = None parseFile = None yaccFile = None global verbose global g verbose = 0 lumped = 1 try: opts, args = getopt.getopt(sys.argv[1:], "ha:v:p:g:y:", ["help", "as=", "verbose=", "parse=", "grammar=", "yacc="]) except getopt.GetoptError: usage() sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() if o in ("-v", "--verbose"): verbose =int(a) diag.chatty_flag = int(a) if o in ("-a", "--as"): parseAs = uripath.join(uripath.base(), a) if o in ("-p", "--parse"): parseFile = uripath.join(uripath.base(), a) if o in ("-g", "--grammar"): grammarFile = uripath.join(uripath.base(), a) if o in ("-y", "--yacc"): yaccFile = uripath.join(uripath.base(), a)[5:] # strip off file: # if testFiles == []: testFiles = [ "/dev/stdin" ] if not parseAs: usage() sys.exit(2) parseAs = uripath.join(uripath.base(), parseAs) if not grammarFile: grammarFile = parseAs.split("#")[0] # strip off fragid else: grammarFile = uripath.join(uripath.base(), grammarFile) # The Grammar formula progress("Loading " + grammarFile) start = clock() g = load(grammarFile) taken = clock() - start + 1 progress("Loaded %i statements in %fs, ie %f/s." % (len(g), taken, len(g)/taken)) document = g.newSymbol(parseAs) already = [] agenda = [] errors = [] doProduction(document) while agenda: x = agenda[0] agenda = agenda[1:] already.append(x) doProduction(x) if errors != []: progress("###### FAILED with %i errors." % len(errors)) for s in errors: progress ("\t%s" % s) exit(-2) else: progress( "Ok for predictive parsing") #if parser.verb: progress "Branch table:", branchTable if verbose: progress( "Literal terminals: %s" % literalTerminals.keys()) progress("Token regular expressions:") for r in tokenRegexps: progress( "\t%s matches %s" %(r, tokenRegexps[r].pattern) ) if yaccFile: yacc=open(yaccFile, "w") yaccConvert(yacc, document, tokenRegexps) yacc.close() if parseFile == None: exit(0) ip = webAccess.urlopenForRDF(parseFile, None) str = ip.read().decode('utf_8') sink = g.newFormula() keywords = g.each(pred=BNF.keywords, subj=document) keywords = [a.value() for a in keywords] p = PredictiveParser(sink=sink, top=document, branchTable= branchTable, tokenRegexps= tokenRegexps, keywords = keywords) p.verb = verbose start = clock() p.parse(str) taken = clock() - start + 1 progress("Loaded %i chars in %fs, ie %f/s." % (len(str), taken, len(str)/taken)) progress("Parsed <%s> OK" % parseFile) sys.exit(0) # didn't crash
def doComponent(self, sts, comp, name, decls): w = self._w w("BEGIN:%s%s" % (name, CRLF)) className, props, subs = decls[name] if self.floatTZ and name == "VCALENDAR": # In the floatTZ case, we write out a timezone decl, # but it has a fully-qualified TZID, which Apple iCal doesn't # seem to grok (@@bug report pending). # So we use the short TZID to refer to this timezone, # which works even though it shouldn't. tzaddr = TZD + self.floatTZ progress("loading timezone...", tzaddr) tzkb = load(tzaddr) for tzc in tzkb.each(pred=RDF.type, obj=ICAL.Vtimezone): progress("exporting timezone...", tzc) save, self.floatTZ = self.floatTZ, None self.doComponent(tzkb, tzc, "VTIMEZONE", subs) self.floatTZ = save propNames = props.keys() propNames.sort() for prop in propNames: predName, valueType = props[prop][:2] for val in sts.each(comp, ICAL.sym(predName)): if valueType == 'TEXT': self.doSIMPLE(mkTEXT(val, sts), prop) elif valueType == 'INTEGER': self.doSIMPLE(mkINTEGER(val), prop) elif valueType == 'FLOAT': self.doSIMPLE(mkFLOAT(val), prop) elif valueType == 'URI': self.doURI(val, prop) elif valueType == 'DATE-TIME': self.doDateTime(sts, val, prop, predName) elif valueType == 'DURATION': self.doDuration(sts, val, prop, predName) elif valueType == 'RECUR': self.doRecur(sts, val, prop, predName) elif valueType == 'CAL-ADDRESS': self.doCalAddress(sts, val, prop, predName) elif type(valueType) == tuple: itemType = valueType[0] if itemType not in ('TEXT', 'INTEGER', 'FLOAT'): raise RuntimeError, "list value type not implemented" values = [] while 1: first = val.first val = val.rest mkSIMPLE = { 'TEXT': mkTEXT, 'INTEGER': mkINTEGER, 'FLOAT': mkFLOAT }[itemType] v = mkSIMPLE(first) values.append(v) if val == RDF.nil: break self.doSIMPLE(';'.join(values), prop) else: raise RuntimeError, "value type not implemented: " + \ str(valueType) + " on " + str(prop) compToDo = [] for sub in sts.each(subj=comp, pred=ICAL.component): for subName in subs.keys(): className, p, s = subs[subName] if sts.statementsMatching(RDF.type, sub, ICAL.sym(className)): compToDo.append((sts, sub, subName, subs)) break else: raise ValueError, "no component class found: %s" % subName # compToDo.sort(key=compKey) # darn... only in python 2.4 compToDo.sort(componentOrder) for sts, sub, subName, subs in compToDo: self.doComponent(sts, sub, subName, subs) # timezone standard/daylight components use a different structure # hmm... is this a good idea? if name == 'VTIMEZONE': self.doTimeZone(sts, comp, subs) w("END:%s%s" % (name, CRLF))
def doComponent(self, sts, comp, name, decls): w = self._w w("BEGIN:%s%s" % (name, CRLF)) className, props, subs = decls[name] if self.floatTZ and name == "VCALENDAR": # In the floatTZ case, we write out a timezone decl, # but it has a fully-qualified TZID, which Apple iCal doesn't # seem to grok (@@bug report pending). # So we use the short TZID to refer to this timezone, # which works even though it shouldn't. tzaddr = TZD + self.floatTZ progress("loading timezone...", tzaddr) tzkb = load(tzaddr) for tzc in tzkb.each(pred = RDF.type, obj = ICAL.Vtimezone): progress("exporting timezone...", tzc) save, self.floatTZ = self.floatTZ, None self.doComponent(tzkb, tzc, "VTIMEZONE", subs) self.floatTZ = save propNames = props.keys() propNames.sort() for prop in propNames: predName, valueType = props[prop][:2] for val in sts.each(comp, ICAL.sym(predName)): if valueType == 'TEXT': self.doSIMPLE(mkTEXT(val, sts), prop) elif valueType == 'INTEGER': self.doSIMPLE(mkINTEGER(val), prop) elif valueType == 'FLOAT': self.doSIMPLE(mkFLOAT(val), prop) elif valueType == 'URI': self.doURI(val, prop) elif valueType == 'DATE-TIME': self.doDateTime(sts, val, prop, predName) elif valueType == 'DURATION': self.doDuration(sts, val, prop, predName) elif valueType == 'RECUR': self.doRecur(sts, val, prop, predName) elif valueType == 'CAL-ADDRESS': self.doCalAddress(sts, val, prop, predName) elif type(valueType) == tuple: itemType = valueType[0] if itemType not in ('TEXT', 'INTEGER', 'FLOAT'): raise RuntimeError, "list value type not implemented" values = [] while 1: first = val.first val = val.rest mkSIMPLE = {'TEXT': mkTEXT, 'INTEGER': mkINTEGER, 'FLOAT': mkFLOAT}[itemType] v = mkSIMPLE(first) values.append(v) if val == RDF.nil: break self.doSIMPLE(';'.join(values), prop) else: raise RuntimeError, "value type not implemented: " + \ str(valueType) + " on " + str(prop) compToDo = [] for sub in sts.each(subj = comp, pred = ICAL.component): for subName in subs.keys(): className, p, s = subs[subName] if sts.statementsMatching(RDF.type, sub, ICAL.sym(className)): compToDo.append((sts, sub, subName, subs)) break else: raise ValueError, "no component class found: %s" % subName # compToDo.sort(key=compKey) # darn... only in python 2.4 compToDo.sort(componentOrder) for sts, sub, subName, subs in compToDo: self.doComponent(sts, sub, subName, subs) # timezone standard/daylight components use a different structure # hmm... is this a good idea? if name == 'VTIMEZONE': self.doTimeZone(sts, comp, subs) w("END:%s%s" % (name, CRLF))
print __doc__ sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit() if o in ("-v", "--verbose"): verbose = 1 if o in ("-g", "--gpsData"): gpsData = a if o in ("-o", "--output"): outputURI = a if verbose: progress( "Loading Photo data...") f = load(gpsData + "/PhotoMeta.n3") if verbose: progress( "Loaded.") ss = f.statementsMatching(pred=FILE.date) events = [] for s in ss: ph = s.subject() photo = str(ph) date = str(s.object()) da = f.any(subj=ph, pred=EXIF.dateTime) if da != None: date = str(da) else: progress("Warning: using file date %s for %s" %(date, photo)) events.append((date, "P", (ph, photo))) if verbose: progress("%s: %s" %(date, photo))
sys.exit(2) output = None for o, a in opts: if o in ("-h", "--help"): print __doc__ sys.exit() if o in ("-v", "--verbose"): verbose = 1 if o in ("-g", "--gpsData"): gpsData = a if o in ("-o", "--output"): outputURI = a if verbose: progress("Loading Photo data...") f = load(gpsData + "/PhotoMeta.n3") if verbose: progress("Loaded.") ss = f.statementsMatching(pred=FILE.date) events = [] for s in ss: ph = s.subject() photo = str(ph) date = str(s.object()) da = f.any(subj=ph, pred=EXIF.dateTime) if da != None: date = str(da) else: progress("Warning: using file date %s for %s" % (date, photo)) events.append((date, "P", (ph, photo))) if verbose: