Example #1
0
 def insertoriginalct(self):
     if not self.odb.has_key(self.url):
         try:
             serialnumber = contentline.asciitoint(self.serialcursor.last()[0]) + 1
             asciiserial = contentline.inttoascii(serialnumber)
             dbutils.DeadlockWrap(self.serialdb.put, asciiserial, self.url, max_retries=12)
         except db.DBPageNotFoundError:
             print "WARNING: DBPageNotFoundError while loading database"
             self.serialdb.sync()
             self.close()
             print "Close db",
             time.sleep(3)
             self.initdb()
             print "-> initial db"
             self.serialdb.sync()
             serialnumber = contentline.asciitoint(serialcursor.last()[0]) + 1
             asciiserial = contentline.inttoascii(serialnumber)
             dbutils.DeadlockWrap(self.serialdb.put, asciiserial, self.url, max_retries=12)
         compresscontent = zlib.compress(self.content, 9)
         dbutils.DeadlockWrap(self.odb.put, self.url, compresscontent, max_retries=12)
         # self.odb['%s'%self.url]='%s'%(compresscontent)
     self.synccount += 1
     if self.synccount // 20000 == 0:
         self.sync()
     return
Example #2
0
    def insertPurecontent(self):
        if not self.pdb.has_key(self.url_md5) and len(self.content) > 1:
            self.serialdb[chr(0) * 4] = "0"  # initial serial db.
            serialnumber = contentline.asciitoint(self.serialcursor.last()[0]) + 1
            asciiserial = contentline.inttoascii(serialnumber)
            self.serialdb[asciiserial] = self.url_md5  #  serialdb insert
            self.puresedb["%s" % self.url_md5] = asciiserial  # insert serial to url_md5
            compresscontent = zlib.compress(self.content, 9)
            self.pdb["%s" % asciiserial] = "%s" % (compresscontent)
            if not self.tdb.has_key(asciiserial) and len(self.title) > 1:
                self.tdb["%s" % asciiserial] = "%s" % (self.title)

            # insert purecontentcount and pureinline
            totallinesize = len(self.purecotentinline)
            self.purecontentcount["%s" % asciiserial] = "%s" % str(totallinesize)
            # 2 bytes serial line
            self.pureinline["%s" % asciiserial + contentline.lintoascii(0)] = self.title
            for x in xrange(totallinesize):
                serialkey = asciiserial + contentline.lintoascii(x + 1)
                self.pureinline["%s" % serialkey] = self.purecotentinline[x]