def reloadContext(self, filename): """update the context to contain the contents of the file. There may be a time when the context is partially empty or partially full. filename is relative to inputDirectory""" ctx = contextFromFilename(filename, self.contextPrefix, self.inputDirectory) log.debug("reloading %s", ctx) try: ext = os.path.splitext(filename)[1].replace('.', '') self.graph.safeParse(filename, publicID=ctx, format=ext) self.setLastImportTime(ctx, datetime.datetime.now(tzlocal()), filename) except (xml.sax._exceptions.SAXParseException, ParseError), e: log.warn("parse error reading file. (%s)", e)
def fileDisappeared(self, filename): """ this file is no longer on disk; drop its graph and bookkeeping triples """ # there's a bug where this was making ValueError since the # filename wasn't underneath the input directory. The bug # is that the polling stops on the first error, requiring # a db restart. try: ctx = contextFromFilename(filename, self.contextPrefix, self.inputDirectory) except ValueError: # if we can't figure out the context, it might be # because this filename is handled by a different # SyncImport with another inputDirectory, which is # cool return log.info("input file %s disappeared, clearing %s" % (filename, ctx)) self.graph.subgraphClear(ctx) self.removeImportRecord(ctx)
def fileIsUpdated(self, filename): """is the file's mtime newer than when we last imported it""" try: try: ctx = contextFromFilename(filename, self.contextPrefix, self.inputDirectory) except ValueError, e: self._logFileError(filename, log.debug, "filename %s doesn't tell us a context- " "skipping (%s)" % (filename, e)) return False # we get the mtime of the target file, not a symlink mtime = datetime.datetime.fromtimestamp( os.path.getmtime(os.path.realpath(filename)), tzlocal()) last = self.lastImportTime(ctx) if not last or last < mtime: log.debug("%s < %s, file %s is updated" % (last.isoformat() if last else 'None', mtime.isoformat(), filename)) return True
def reloadContextSesame(self, filename): """a version that just uploads the file to sesame for parsing and graph replacement. Currently using internal APIs; loadFromFile should be made public""" ctx = contextFromFilename(filename, self.contextPrefix, self.inputDirectory) log.debug("reloading %s", ctx) assert filename.endswith(('.n3', '.nt')) n3 = open(filename).read() # rdflib makes prefixes like _1; sesame rejects them. This # might match unexpected stuff! n3 = re.sub(r'_(\d+):', lambda m: 'prefix_%s:' % m.group(1), n3) now = datetime.datetime.now(tzlocal()) try: self.graph._request("PUT", path="/statements", queryParams={'context' : ctx.n3()}, payload=n3, headers={'Content-Type' : 'text/rdf+n3'}) self.setLastImportTime(ctx, now, filename) except KeyboardInterrupt: raise except Exception, e: log.error("while trying to reload:\n%s", traceback.format_exc())