def schedule (self): self.refresh = datetime.now() + timedelta(seconds=Loader.Reload_Min) self.queue = [] for doc in Document.select(Document.q.interval > 0): if doc.fetched_at: event = doc.fetched_at + timedelta(seconds=doc.interval) else: event = datetime.now() loader = self.loader[doc.doc_type] heappush(self.queue, (event, doc, loader.load)) log.msg("%d events scheduled." % len(self.queue))
def add(self, guid, content=None, source=None): print >>sys.stderr, "Adding '%s' (%s)" % (title, url) try: doc = Document.byGuid(guid) except: doc = Document(guid=guid, doc_type=self.doc_type) Link(source=source, target=doc) if content and not doc.data: data, tags, vector = self.process(content) doc.data = data doc.vector = vector doc.tags = tags doc.sync() return doc
from store import Document, sqlhub, connectionForURI from vector import Vector, VectorSet import sys, os DB_URI = "sqlite:/tmp/memex.db" sqlhub.processConnection = connectionForURI(DB_URI) index = VectorSet() for doc in Document.select(): if doc.vector: index[doc.url] = Vector(map(int, doc.vector.split(","))) for dist, url in index.nearest(index[sys.argv[1]]): print "%.3f\t%s" % (dist, url)