def fetch(url, supercache=False): """Download the contents over the web. We can super-cache everything, since wikis aren't so good about giving us good cacheability information. (See wiki_cache module for sophisticated version of this.) TODO: we should have a separate process which keeps these up-to-date by using the wiki watch mechanism, especially for wikis (unlike wikipedia) which will mail out change notifications. This wiki-cache-maintainer can make the kind of cache access we're doing here be just fine. Just use a structure like this: wiki-dump-cache/quoted-URL Note that modern linux filesystems (post ext2) should handle this fine up to millions of cache entries. cf the test results in http://groups.google.com/group/alt.os.linux.suse/browse_thread/thread/02d5738ce786bd4c/52fa2c128c8f1d32 """ if not os.path.exists(cacheDir): os.makedirs(cacheDir) filename = cacheDir+"/"+urllib.quote(url, "") if not supercache or not os.path.exists(filename): debug("web", ' fetching '+url) urllib.urlretrieve(url, filename) time.sleep(0.2) return(open(filename).read())
def __getattr__(self, prop): if prop[0] is "_": raise AttributeError prop = q(prop) debug('ast2-get', 'returning attr for', prop) return self.dict.setdefault(prop, Multi())
def ps_heart(self, newline): s = newline # figure out a smart base? #if self.get_base(): # s += 'Base('+ps_quoted_string(self.base)+')'+newline # s += newline if use_qnames: if hasattr(self, '_prefix_map'): map = self._prefix_map else: map = PrefixMap() self.add_to_prefix_map(map) debug('prefix_map', 'final map:', `map.entries`) for (short, long) in map.entries: s += 'Prefix(%s <%s>)%s' % (short, long, newline) s += newline # @@@ import if self.payload: s += self.payload.as_ps(newline) s += newline return s
def regonize(self, line): tokens = re.split(r" |\?|,|\.", line.lower()) keywordFound = False for token in tokens: if token in self.keywords: keywordFound = True dbg.debug(tokens, re.split(line.lower(), " ")) dbg.debug("Scanned for ", self.name, ", keywords:", keywordFound) if not keywordFound: return None argDict = dict() for argName in self.arguments: arg = self.arguments[argName] pVals = arg.scanForValues(tokens) if len(pVals)==0: continue # TODO: change this temporary solution argDict[argName] = pVals[0] if not self.autofill is None: self.autofill(argDict) if self.validate(argDict): return argDict else: return None
def run(): plugin.import_all() parser = OptionParser(usage="%prog [options] input-location", version=__version__) parser.set_defaults(verbose=True) parser.set_defaults(output="-") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", help="don't print status messages to stderr") parser.add_option("-D", "--debug", action="append", dest="debugTags", help="turn on debugging of a particular type (try 'all')") #parser.add_option("-O", "--output", action="store", dest="output", # help="Save the output to this file (else stdout)") plugin.add_to_OptionParser(parser) (options, args) = parser.parse_args() # feed the options back, somehow... # plugin.options_from_OptionParser(options) maybe? if options.debugTags: debugtools.tags.update(options.debugTags) verbose = options.verbose debug('cmdline', 'args:', args) plugins = plugin.combined(options) if plugins.can_run("system_test"): if plugins.system_test(): sys.exit(0) else: sys.exit(1) if len(args) != 1: parser.print_help() sys.exit(1) input_filename = args[0] if input_filename == "-": input_stream = sys.stdin else: input_stream = open(input_filename, "r") input_text = input_stream.read() debug('cmdline', 'read %d bytes' % len(input_text)) iproc = plugin.get_one_plugin(["parse"], options) try: doc = iproc.parse(input_text) except error.SyntaxError, e: err = "" err += e.message + "\n" err += e.illustrate_position() print >>sys.stderr, err return
def debugattrs(node): debug("attr(", "attributes: ") debug("attr", "attrsNS: %s" % `node._attrsNS`) debug("attr", "attrs: %s" % `node._attrs`) a = node.attributes for i in range(0, a.length): debug("attr", "%s=\"%s\"" % (a.item(i).name, a.item(i).value)) debug("attr)")
def __setattr__(self, prop, value): if prop[0] is "_": raise AttributeError assert not isinstance(value, Multi) assert isinstance(prop, basestring) # unicode? IRI. prop = q(prop) debug('ast2', 'adding',prop,"=",value) self.dict.setdefault(prop, Multi()).add(value)
def add(self, short, long): if self.has_short(short): if self.expand(short) == long: pass # just a dup else: raise RuntimeError, 'changing prefix binding' self.entries.append( (short, long) ) debug('prefix_map', 'added entry', short, long)
def build_domap(self): self.domap = { } prefix = "do_" for f in dir(self): debug('build_domap', 'candidate:', f) if f.startswith(prefix): name = f[len(prefix):] self.domap[name] = getattr(self, f) debug('build_domap', 'added:', name)
def add(self, new): debug('ast2-multi-add', 'multi add', new) assert not isinstance(new, Multi) if not (new in self.values): self.values.append(new) return True else: return False
def __getattr__(self, prop): if prop[0] is "_": raise AttributeError, "Instance.__getattr__ called on internal attribute %s. Ooops." % `prop` prop = self._q(prop) debug('ast2-get', 'returning attr for', prop) m = self._factory.Multi() m.subj = self._ident m.prop = prop return m
def __setattr__(self, prop, value): if prop[0] is "_": raise AttributeError, "Instance.__setattr__ called on internal attribute %s. Ooops." % `prop` assert not isinstance(value, Multi) assert isinstance(prop, basestring) # unicode? IRI. prop = self._q(prop) debug('ast2', 'adding',prop,"=",value) self._factory.engine.assertz("rdf({self._ident}, {prop}, {value})", locals())
def db_start(self): debug('scan', 'database connection started') self.source_id = irimap.to_id(self.db, self.data_source_iri) self.id = self.db.insert('scan', source_id=self.source_id, time_begun=self.start, triples=0, last_modified=self.last_modified, status=0, # or just use time_completed? obsoleted_by=max_timecode ) debug('scan', 'database record created', self.id)
def add(self, new, graph_if_adding=None): """if this value is in the merged graph, just return false; If it's not, then add it to the graph_if_adding graph. """ debug('ast2-multi-add', 'multi add', new) assert not isinstance(new, Multi) if not (new in self.values): self.value_holders.append(ValueHolder(new, graph_if_adding)) return True else: return False
def do(self, *objList): was_internal = self.internal self.internal = True for obj in objList: typenames = [] if isinstance(obj, nodecentric.DataValue): typenames.append(obj.serialize_as_type) typenames.append("BaseDataValue") if isinstance(obj, nodecentric.Sequence): typenames.append("Sequence") elif isinstance(obj, nodecentric.Instance): # use schema.py to find superclasses? # ... WHICH TYPE to use??? try: pt = getattr(obj, nodecentric.RDF_TYPE).the.lexrep except: pt = "Resource" assert isinstance(pt, basestring) try: (dummy, pt) = pt.split("#") except ValueError: pass typenames.append(pt) else: # is there some good way to look at the python superclass # hierarchy...? typenames.append(type(obj).__name__) debug('serializer(', 'typenames', typenames) doer = self.default_do for t in typenames: try: doer = self.domap[t] break except KeyError: pass debug('serializer', 'doer:', doer) doer(obj) debug('serializer', 'current', self.current_element) debug('serializer)') self.internal = was_internal if not was_internal: debug('serializer', 'flushing...') self.flush()
def replace_atomic(self, atom): '''Replace any atoms containing external-exprs with an And of the atom re-written and a call to calc''' debug('f2p', 'in replace-atomic, self is', self) if not (atom.has_type(rifns+"Atom") or atom.has_type(rifns+"Equal")): return atom debug('f2p(', 'found an atom') harvest = [] new_atom=atom.map_replace(lambda x: self.replace_external(x, harvest)) if harvest: parent = self.factory.Instance(rifns+"And") for (var, expr) in harvest: calc = self.factory.Instance(rifns+"Atom") setattr(calc, rifns+"op", self.calc_pred) setattr(calc, rifns+"args", self.factory.Sequence(items=[var, expr])) parent._addpv(rifns+"formula", calc) parent._addpv(rifns+"formula", new_atom) debug('f2p)', 'replaced it') return parent else: debug('f2p)', 'left it alone') return new_atom
def query(self, lang1, lang2, word): # Get the site url = self.get_url(lang1, lang2, word) dbg.debug(url) try: response = requests.get(url, timeout=5) except requests.exceptions.Timeout: dbg.printerr("The connection has timed out") site_soup = BeautifulSoup(response.content, "html.parser") # Search the dictionary d_tag = site_soup.find(id = "dictionary") #dbg.debug(d_tag.prettify()) if d_tag is None: dbg.printerr("Didn't find the result from dictionary") return None dictPage = None for tag in (d_tag.find_all(class_ = "isMainTerm") + d_tag.find_all(class_ = "isForeignTerm")): #dbg.debug("Found a main term") #dbg.debug(tag.prettify()) if not BSH.hasAttr(tag, "data-source-lang", Languages.shortName[lang1]): continue # Found the right tag dictPage = tag if dictPage is None: # Din't find the right page dbg.printerr("Didn't find the right page") return None results = [] for entry in dictPage.find_all(class_ = "lemma featured"): result = self.getWord(entry, lang1, lang2) if result is None: continue results.append(result) return results
def builtin_calc(self, var, expr): # not currently used... func = expr.op.the.value.the if func.datatype == rifns+"iri": (ns, local) = qname.uri_split(func.lexrep) if ns == rif_bif: arg = expr.args.the.items method_name = "builtin_"+local.replace("-","_") debug('prolog-bi', 'looking for func ', method_name) attr = getattr(self, method_name, None) if attr: attr(var, *arg) return raise MissingBuiltin # it's not builtin...
def run_query(kb, query, msg): """assert the document, then query for the pattern, returning all the sets of bindings.""" to_pl = tempfile.NamedTemporaryFile('wb', dir="testing_tmp", delete=False) from_pl = tempfile.NamedTemporaryFile('rb', dir="testing_tmp", delete=False) debug('prolog', to_pl.name, from_pl.name) global filenames filenames = (to_pl.name, from_pl.name) nsmap = qname.Map() nsmap.defaults = [qname.common] #nsmap.bind("", "http://www.w3.org/2007/rif#") to_pl.write("% "+msg) ast = kb._factory rifeval = ast.Instance('rif_Const') rifeval.rif_value = ast.DataValue(rif_bip+'eval', rifns+'iri') kb_pform = func_to_pred.Plugin(calc_pred=rifeval).transform(kb) query_pform = func_to_pred.Plugin(calc_pred=rifeval).transform(query) Plugin(nsmap=nsmap, supress_nsmap=True).serialize(kb_pform, to_pl) Plugin(nsmap=nsmap).serialize(query_pform, to_pl) to_pl.close() popen = subprocess.Popen( ["swipl", "-q", "-g", "[builtins], run_query(%s, %s), halt." % (atom_quote(to_pl.name), atom_quote(from_pl.name)) ], bufsize=0, # unbuffered for now at least stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) # to start reading this safely....? # popen.stdout.setblocking(False) (stdoutdata, stderrdata) = popen.communicate() if stdoutdata: # since this doesn't seem to ever happen, I guess we can # use this for the results feed error.notify(error.UnexpectedOutputFromSubprocess( "\n===stdout===\n"+stdoutdata+"==========")) if stderrdata: error.notify(error.UnexpectedOutputFromSubprocess( "\n===stderr===\n"+stderrdata+"==========")) if popen.returncode != 0: error.notify(error.ErrorReturnFromSubprocess("Return code: "+ str(popen.returncode))) result = read_solutions(from_pl) return result
def map_replace(self, func, *args, **kwargs): '''call the func on each descendant Instance, replacing it with whatever Instance func returns. Works from the leaves up, operating on children before their parents. If func returns None, that list-item or property value is simply removed''' debug('ast2-map(', 'begin') for prop in self.properties: multi = getattr(self, prop) self.list_map(multi.values, func, True, args, kwargs) new_self = func(self, *args, **kwargs) debug('ast2-map)') return new_self
def flush(self): if self.root is not None: self.short_count = {} self.ns_add_tree(self.root) if self.short_count: debug('serializer', 'short_count:',self.short_count) (count, best_short) = sorted( [(count, short) for (short, count) in self.short_count.items()] )[-1] debug('serializer', 'count, best_short =', count, best_short) self.nsmap.bind('', self.nsmap.getLong(best_short)) self.out_xml(self.root, is_root=True) self.stream.write(self.newline)
def add_to_prefix_map(self, map): debug('prefix_map', 'adding IRI', self.text) # hack! save the map used! hack! hack! # # (otherwise, we'd have to pass it all the way down, # or switch to using a serializer object.) # self.map = map try: (long, local_part) = iri_split(self.text) map.add_long(long) except IndexError: pass
def handle_builtin_atom(self, obj): pred = obj.op.the.value.the if pred.datatype == rifns+"iri": (ns, local) = qname.uri_split(pred.lexrep) if ns == rif_bip: arg = obj.args.the.items method_name = "builtin_"+local.replace("-","_") debug('prolog-bi', 'looking for pred ', method_name) attr = getattr(self, method_name, None) if attr: attr(*arg) else: self.outk(method_name) self.outk("(") self.do(obj.args.the) self.outk(")") return raise MissingBuiltin # it's not builtin
def add(self, new, graph_if_adding=None): """if this value is in the merged graph, just return false; If it's not, then add it to the graph_if_adding graph. """ debug("ast2-multi-add", "multi add", new) assert not isinstance(new, Multi) assert self.subj is not None if isinstance(new, Sequence): new = new.attach(self.factory.graph) triple = (self.subj, self.prop, new._node) if triple not in self.factory.graph: self.factory.graph.add(triple) return True else: return False
def do(self, *objList): for obj in objList: if isinstance(obj, AST.Node): typename = obj._type[1] else: typename = type(obj).__name__ debug('serializer', 'typename', typename) # use schema.py to find superclasses? try: doer = self.domap[typename] debug('serializer', 'doer', doer) except KeyError: try: doer = self.default_do except AttributeError: raise RuntimeError("No serialization defined for your %s, %s" % (typename, `obj`)) doer(obj)
def scanForValues(self, tokens): if not self.hasRecognitionData: return [] dbg.debug("Scanning for ", self.name) ret = [] ln = len(tokens) for i in range(ln): if not tokens[i] in self.indicators: continue for proc in self.valueAliases: val = proc(tokens, i+1) if not val is None and self.validate(val): ret.append(val) dbg.debug("For", self.name, "found", ret) return ret
def openStream(URL): for extra in ('', '.xml', '.xsd'): if os.path.exists(URL+extra): filename = URL+extra stream = open(filename) return stream for (URLPrefix, FilesystemPrefix) in localizeURLs: if URL.startswith(URLPrefix): suffix = URL[len(URLPrefix):] filename = FilesystemPrefix + suffix for extra in ('', '.xml', '.xsd'): if os.path.exists(filename+extra): filename = filename+extra break debug('web', 'actually using file '+filename) stream = open(filename, "r") return stream debug("web", "opening "+`URL`) stream = urllib2.urlopen(URL) return stream
def replace_external(self, external, harvest): '''If the instance is a rif.External, then replace it with a new variable and append them both to harvest''' debug('f2p', 'looking for external, found a', external._primary_type) if not external.has_type(rifns+"External"): return external debug('f2p', 'got external!', external) debug('f2p', 'self is', self) debug('f2p', 'Using2 instance factory', self.factory) var = self.factory.Instance(rifns+"Var") # TODO: put the variable name up in quantifier!!! (and make sure # it's unique...) # (we'll have to do that with a trick like 'harvest' on Forall, I guess) setattr(var, rifns+"name", self.factory.StringValue("genvar%d" % self.v_count)) self.v_count += 1 harvest.append( (var, getattr(external, rifns+"content").the) ) debug('f2p(', 'found an external; replaced with', var) return var
def map_replace(self, func, *args, **kwargs): '''call the func on each descendant Instance, replacing it with whatever Instance func returns. Works from the leaves up, operating on children before their parents. If func returns None, that list-item or property value is simply removed This is DESTRUCTIVE. It modifies-in-place. But it also might return a different value, so you must use the returned value. ''' debug('ast2-map(', 'begin') for prop in self.properties: multi = getattr(self, prop) values = self.list_map(multi.values, func, True, args, kwargs) multi.clear() for value in values: multi.add(value) new_self = func(self, *args, **kwargs) debug('ast2-map)') return new_self
def instantiate_with_options(plugin, options): kwargs = {} for option in getattr(plugin, 'options', ()): key = plugin.id + "_" + option.name if getattr(option, "values", []): value = [] for provided in getattr(options, key).split(","): found = False for v in option.values: if v.id == provided: value.append(v) found = True if not found: raise optparse.OptionValueError("--%s value %s not one of %s" % (key, repr(provided), [v.id for v in option.values])) if getattr(option, "maxcard", None) == 1: if len(value) > 1: raise optparse.OptionValueError("--%s can only have ONE of the values %s" % (key, [v.id for v in option.values])) else: value = getattr(options, key) kwargs[option.name] = value debug('plugin', 'instantiating', plugin.id, "with args", kwargs) return plugin(**kwargs)
def save(node, filename, use_html_entities=True): """ Entify == use named HTML entities where possible. This can help with diffs, etc. """ stream = open(filename, "w") if use_html_entities: attrquot_step_1(node) text = node.toxml() # hack to workaround the fact the in HTML you can't actually # repeat the xmlns declaration for HTML -- which rdf:Literals give # us. text = text.replace('<div xmlns="http://www.w3.org/1999/xhtml">', '<div>') if use_html_entities: text1 = entifyString(text) text = attrquot_step_2(text1) debug('attrquot', 'step 2 strings were different?', text==text1) stream.write(text) stream.close()
def transform(self, instance): if self.default_factory is None: self.factory = instance._factory debug('f2p', 'self is', self) debug('f2p', 'Using instance factory', self.factory) else: self.factory = self.default_factory debug('f2p', 'calling map_replace, self is', self) instance.map_replace(self.replace_atomic) return instance
def debug(self, lvl = 0): if lvl==0: dbg.debug("\nPrinting word ", self.word, " in ", self.language) else: dbg.debug("\nPrinting word ", self.word, "(", self.wordType, ")", " in ", self.language,\ ": ", self.grammar) dbg.debug("Translations: ") if lvl>0: for t in self.translations: t.debug((lvl>1))
def map(function, obj, *args, **kwargs): """ Apply this function to every node in the tree, starting with the leaves and building up to the root. The function is called with the first parameter being an object in the tree (a node or data value), and the other args are as given. The function returns whatever the node should be replaced with. (Should we allow None to mean list items are removed from the list? Right now it puts a None in the list.) """ if obj is None: return None if isinstance(obj, str): # do we allow these...? return obj debug('AST.map(', 'obj begins', obj) assert obj is not None if isinstance(obj, DataValue): obj = function(obj, *args, **kwargs) elif isinstance(obj, Sequence): for i in range(0, len(obj.items)): value = obj.items[i] assert value is not None value = map(function, value, *args, **kwargs) assert value is not None obj.items[i] = function(value, *args, **kwargs) elif isinstance(obj, list): for i in range(0, len(obj)): value = obj[i] assert value is not None value = map(function, value, *args, **kwargs) assert value is not None obj[i] = function(value, *args, **kwargs) elif isinstance(obj, Node): for (key, value) in obj.__dict__.items(): if key.startswith("_"): continue debug('AST.map', 'property ', key) value = map(function, value, *args, **kwargs) obj.__dict__[key] = function(value, *args, **kwargs) obj = function(obj, *args, **kwargs) else: raise RuntimeError, 'Cant map a %s' % type(obj) debug('AST.map)', 'obj ends as ', obj) assert obj is not None return obj
def react(self, line): cut = line.split() if len(cut) == 0: # no reaction to an empty line return 0 tokens = cut[1:] # CMD MODE if cut[0] == '>': command = None for cmd in CMDList: argDict = cmd.parse(tokens) if not argDict is None: if not cmd.validate(argDict): dbg.debug("Command arguments not valid: ", argDict) continue command = cmd arguments = argDict if command is None: print( "The command has not been recognized, it may be not properly structured or unsupported.", flush=True) #print(command.validate) # command recognition mode else: command = None for cmd in CMDList: argDict = cmd.regonize(line) if argDict is None: continue command = cmd arguments = argDict if command is None: print("I didn't quite understand. Please rephrase.", flush=True) if command is None: dbg.debug("No command rezognized") return -1 retcode = command.execute(self, arguments) print(end='', flush=True) return retcode dbg.debug("Unsupported mode") return -1
def execute(session, arguments): dbg.debug("Executing lingvoy with args: ", arguments) if "all" in arguments: for name in ArgDict: if ArgDict[name].validate is ARG.noVal and not name in arguments: arguments[name] = [] if session.languages is None: dbg.debug("Setting a new dictionary") session.languages = Linguee() slang = arguments["slang"][0] source = Languages.longName[slang] dlang = arguments["dlang"][0] dest = Languages.longName[dlang] word = arguments["word"][0] resultList = session.languages.query(source, dest, word) for result in resultList: print(result.word, end = '') if "lang" in arguments: print(" ({})".format(result.language), end = '') if not result.wordType is None: print(" ({})".format(result.wordType), end = '') # print grammar info about the word if "grammar" in arguments: first = True for info in result.grammar: if not first: print(',') print(" {}: {}".format(info, result.grammar[info]), end = '') first = False print() # print the translations: for translation in result.translations: if "trans" in arguments: print(" " + translation.word, end = '') if "lang" in arguments: print(" ({})".format(translation.language), end = '') if "grammar" in arguments and len(translation.grammar) > 0: print(" (", end = '') first = True for info in translation.grammar: if not first: print (", ", end = '') first = False print("{}: {}".format(info, translation.grammar[info]), end= '') print(")", end = '') print() if "example" in arguments: for example in translation.examples: print(" " + example[0]) if "trans" in arguments: print(" " + example[1]) # finished execution with success return True
def debug(self, more = False): dbg.debug(" ", self.word, "(", self.language, ")", self.grammar) if more: for example in self.examples: dbg.debug(" ", example)