Ejemplo n.º 1
0
    def reloadContext(self, filename):
        """update the context to contain the contents of the
        file. There may be a time when the context is partially empty
        or partially full.

        filename is relative to inputDirectory"""
        ctx = contextFromFilename(filename, self.contextPrefix,
                                  self.inputDirectory)
        log.debug("reloading %s", ctx)
        try:
            ext = os.path.splitext(filename)[1].replace('.', '')
            self.graph.safeParse(filename, publicID=ctx, format=ext)
            self.setLastImportTime(ctx, datetime.datetime.now(tzlocal()),
                                   filename)
        except (xml.sax._exceptions.SAXParseException,
                ParseError), e:
            log.warn("parse error reading file. (%s)", e)
Ejemplo n.º 2
0
 def fileDisappeared(self, filename):
     """
     this file is no longer on disk; drop its graph and bookkeeping triples
     """
     # there's a bug where this was making ValueError since the
     # filename wasn't underneath the input directory. The bug
     # is that the polling stops on the first error, requiring
     # a db restart.
     try:
         ctx = contextFromFilename(filename, self.contextPrefix,
                                   self.inputDirectory)
     except ValueError:
         # if we can't figure out the context, it might be
         # because this filename is handled by a different
         # SyncImport with another inputDirectory, which is
         # cool
         return
     log.info("input file %s disappeared, clearing %s" % (filename, ctx))
     self.graph.subgraphClear(ctx)
     self.removeImportRecord(ctx)
Ejemplo n.º 3
0
    def fileIsUpdated(self, filename):
        """is the file's mtime newer than when we last imported it"""
        try:
            try:
                ctx = contextFromFilename(filename, self.contextPrefix,
                                          self.inputDirectory)
            except ValueError, e:
                self._logFileError(filename, log.debug,
                      "filename %s doesn't tell us a context- "
                      "skipping (%s)" % (filename, e))

                return False
            # we get the mtime of the target file, not a symlink
            mtime = datetime.datetime.fromtimestamp(
                os.path.getmtime(os.path.realpath(filename)), tzlocal())
            last = self.lastImportTime(ctx)
            if not last or last < mtime:
                log.debug("%s < %s, file %s is updated" %
                          (last.isoformat() if last else 'None',
                           mtime.isoformat(), filename))
                return True
Ejemplo n.º 4
0
 def reloadContextSesame(self, filename):
     """a version that just uploads the file to sesame for parsing
     and graph replacement. Currently using internal APIs;
     loadFromFile should be made public"""
     ctx = contextFromFilename(filename, self.contextPrefix,
                               self.inputDirectory)
     log.debug("reloading %s", ctx)
     assert filename.endswith(('.n3', '.nt'))
     n3 = open(filename).read()
     # rdflib makes prefixes like _1; sesame rejects them. This
     # might match unexpected stuff!
     n3 = re.sub(r'_(\d+):', lambda m: 'prefix_%s:' % m.group(1), n3)
     now = datetime.datetime.now(tzlocal())
     try:
         self.graph._request("PUT", path="/statements",
                             queryParams={'context' : ctx.n3()},
                             payload=n3,
                             headers={'Content-Type' : 'text/rdf+n3'})
         self.setLastImportTime(ctx, now, filename)
     except KeyboardInterrupt: raise
     except Exception, e:
         log.error("while trying to reload:\n%s", traceback.format_exc())