Example #1
0
 def savegetvar(self, var):
   h = DumpTruck(dbname = '/tmp/test.db')
   h.save_var(u'weird', var)
   h.close()
   h = DumpTruck(dbname = '/tmp/test.db')
   t=os.stat('/tmp/test.db').st_mtime
   self.assertEqual(h.get_var(u'weird'), var)
   h.close()
   assert os.stat('/tmp/test.db').st_mtime==t
Example #2
0
 def savegetvar(self, var):
     h = DumpTruck(dbname="/tmp/test.db")
     h.save_var(u"weird", var)
     h.close()
     h = DumpTruck(dbname="/tmp/test.db")
     t = os.stat("/tmp/test.db").st_mtime
     self.assertEqual(h.get_var(u"weird"), var)
     h.close()
     assert os.stat("/tmp/test.db").st_mtime == t
Example #3
0
 def get(self, key, value):
   h = DumpTruck(dbname = u'/tmp/test.db')
   self.assertEqual(h.get_var(key), value)
   h.close()
Example #4
0
 def savegetvar(self, var):
   h = DumpTruck(dbname = '/tmp/test.db')
   h.save_var(u'weird', var)
   self.assertEqual(h.get_var(u'weird'), var)
   h.close()
Example #5
0
            kin = {"kwargs": cb.kwargs}
            kin.update(reference)
            dt.insert(kin, cb.bucket)

        # The first entry has no ancestors, so it has to make its own entry.
        if self.motherbucket == None:
            dt.insert({"scraper_run": scraper_run, "kwargs": self.kwargs}, self.bucket)
        return childbuckets

    def reference(self):
        # For linking scraped data to this row
        return {"kwargs": self.kwargs, "motherkwargs": self.motherkwargs, "scraper_run": scraper_run}


try:
    scraper_run = dt.get_var("scraper_run")
except:
    scraper_run = datetime.date.today().isoformat()
    dt.save_var("scraper_run", scraper_run)


def excavate(bucketclasses=[], startingbuckets=[]):
    "Start everything."

    # Bucket classes (page types)
    if bucketclasses == []:
        for g in globals().values():
            if isinstance(g, BucketMold) and g != BucketMold:
                bucketclasses.append(g)
    bag = Bag(buckets=bucketclasses)