def insertoriginalct(self): if not self.odb.has_key(self.url): try: serialnumber = contentline.asciitoint(self.serialcursor.last()[0]) + 1 asciiserial = contentline.inttoascii(serialnumber) dbutils.DeadlockWrap(self.serialdb.put, asciiserial, self.url, max_retries=12) except db.DBPageNotFoundError: print "WARNING: DBPageNotFoundError while loading database" self.serialdb.sync() self.close() print "Close db", time.sleep(3) self.initdb() print "-> initial db" self.serialdb.sync() serialnumber = contentline.asciitoint(serialcursor.last()[0]) + 1 asciiserial = contentline.inttoascii(serialnumber) dbutils.DeadlockWrap(self.serialdb.put, asciiserial, self.url, max_retries=12) compresscontent = zlib.compress(self.content, 9) dbutils.DeadlockWrap(self.odb.put, self.url, compresscontent, max_retries=12) # self.odb['%s'%self.url]='%s'%(compresscontent) self.synccount += 1 if self.synccount // 20000 == 0: self.sync() return
def insertPurecontent(self): if not self.pdb.has_key(self.url_md5) and len(self.content) > 1: self.serialdb[chr(0) * 4] = "0" # initial serial db. serialnumber = contentline.asciitoint(self.serialcursor.last()[0]) + 1 asciiserial = contentline.inttoascii(serialnumber) self.serialdb[asciiserial] = self.url_md5 # serialdb insert self.puresedb["%s" % self.url_md5] = asciiserial # insert serial to url_md5 compresscontent = zlib.compress(self.content, 9) self.pdb["%s" % asciiserial] = "%s" % (compresscontent) if not self.tdb.has_key(asciiserial) and len(self.title) > 1: self.tdb["%s" % asciiserial] = "%s" % (self.title) # insert purecontentcount and pureinline totallinesize = len(self.purecotentinline) self.purecontentcount["%s" % asciiserial] = "%s" % str(totallinesize) # 2 bytes serial line self.pureinline["%s" % asciiserial + contentline.lintoascii(0)] = self.title for x in xrange(totallinesize): serialkey = asciiserial + contentline.lintoascii(x + 1) self.pureinline["%s" % serialkey] = self.purecotentinline[x]