Ejemplo n.º 1
0
 def updateState(self, url, state):
     db = self._connect()
     if (not isinstance(url, dict)):
         id = md5(url)
     else:
         id = md5(url["url"])
     db[self._tbName].update({"_id": id}, {"$set": {"state": state}})
Ejemplo n.º 2
0
 def updateState(self, url, state):
   conn = sqlite3.connect(self._dbPath)
   try:
     if(not isinstance(url,dict)):
       id = md5(url)
     else:
       id = md5(url["url"])
     conn.cursor().execute("update urls set state={} where id='{}'".format(state, id))
     conn.commit()
   except Exception as err:
     self.log(err)
   conn.close()
Ejemplo n.º 3
0
 def resetUrls(self, urls):
     db = self._connect()
     for url in urls:
         if (not isinstance(url, dict)):
             id = md5(url)
             url = {'url': url, '_id': id, 'state': 0}
         else:
             id = md5(url["url"])
             url['_id'] = id
             url['state'] = 0
         if (not self.checkUrl(url["url"], None)):
             db[self._tbName].insert(url)
         else:
             db[self._tbName].update({"_id": url["_id"]},
                                     {"$set": {
                                         "state": 0
                                     }})
Ejemplo n.º 4
0
 def resetUrls(self, urls):
   datas=[]
   for url in urls:
     if(not isinstance(url,dict)):
       id = md5(url)
       url={'url':url}
     else:
       id = md5(url["url"])
     datas.append((id,json.dumps(url),getTimeNow()))
   if not len(datas): return
   conn = sqlite3.connect(self._dbPath)
   try:
     cursor = conn.cursor()
     # tmp = tuple(datas)
     cursor.executemany("REPLACE into urls(id,json,state,tm) values(?,?,0,?)",tuple(datas))
     conn.commit()
   except Exception as err:
     self.log(err)
   conn.close()
Ejemplo n.º 5
0
    def checkUrl(self, url, i):
        if not self._duplicateRemoval: return False
        db = self._connect()
        id = md5(url)
        tbName = self._tbName

        if (i): tbName = tbName + str(i)
        url = db[tbName].find_one({"_id": id})
        if (not url and not i):
            url = db[self._tbName].find_one({"_id": id})
        return url
Ejemplo n.º 6
0
 def checkUrl(self,url):
   if not self._duplicateRemoval: return False
   conn = sqlite3.connect(self._dbPath)
   try:
     flag=False
     id = md5(url)
     cursor = conn.cursor().execute("select id from urls where id='{}' limit 0,1".format(id))
     for row in cursor:
       flag = True
   except Exception as err:
     self.log(err)
   conn.close()
   return flag
Ejemplo n.º 7
0
 def saveUrl(self, urls, i=None):
     db = self._connect()
     flag = False
     for url in urls:
         if (not isinstance(url, dict)):
             id = md5(url)
             url = {'url': url, '_id': id, 'state': 0}
         else:
             id = md5(url["url"])
             url['_id'] = id
             url['state'] = 0
         if (i != 0):
             i = self._getIndex(url["url"])
         if (not self.checkUrl(url["url"], i)):
             tbName = self._tbName
             if (i):
                 tbName = tbName + str(i)
             db[tbName].insert(url)
             if (i in self._totalCount): self._totalCount[i] += 1
             else: self._totalCount[i] = 1
             flag = True
     if (flag):
         self._tbCache[tbName] = False
Ejemplo n.º 8
0
 def _saveHtml(self, url, html):
     suffix = '.htm'
     index = url.rfind('/')
     if (index > 10):
         index = url.rfind('.', index)
         if (index > 0 and len(url) - index < 10):
             suffix = url[index:]
     filename = md5(url) + suffix
     if (not os.path.exists(self._htmlPath)):
         os.mkdir(self._htmlPath)
     file = io.open(self._htmlPath + filename, "w", encoding="utf-8")
     file.write(html)
     file.close()
     return filename
Ejemplo n.º 9
0
 def resetUrls(self, urls):
   self._lock.acquire()
   try:
     flag=False
     for url in urls:
       if(not isinstance(url,dict)):
         url={'url':url}
       id=md5(url['url'])
       self._urls.append(url)
       self._dic.add(id)
       self._writeFile(url,id)
       flag=True
     if(flag):
       self._flushFile()
   except Exception as err:
     printInfo(err)
   finally:
     self._lock.release()
Ejemplo n.º 10
0
 def saveUrl(self, urls,i=None):
   # if (type(urls).__name__=='dict'):
   #   urls=urls["Urls"]
   self._lock.acquire()
   try:
     flag=False
     for url in urls:
       if(not isinstance(url,dict)):
         url={'url':url}
       if(md5(url['url']) not in self._dic):
         self._urls.append(url)
         self._dic.add(md5)
         self._writeFile(url,md5)
         flag=True
     if(flag):
       self._flushFile()
   except Exception as err:
     printInfo(err)
   finally:
     self._lock.release()
Ejemplo n.º 11
0
 def _saveHtml(self, url, html):
     filename = md5(url) + '.htm'
     file = io.open(self._htmlPath + filename, "w", encoding="utf-8")
     file.write(html)
     file.close()
Ejemplo n.º 12
0
 def checkUrl(self,url):
   if not self._duplicateRemoval: return False
   _r = redis.Redis(connection_pool=self._pool)
   result = _r.sadd(self._setName, md5(url))
   return not result
Ejemplo n.º 13
0
 def checkUrl(self,url):
   return md5(url) in self._dic