コード例 #1
0
ファイル: Uniquedb.py プロジェクト: loeyae/lspider
 def build(self, obj):
     if isinstance(obj, dict):
         return utils.md5(utils.url_encode(obj))
     elif isinstance(obj, list):
         return utils.md5("".join(obj))
     else:
         return utils.md5(str(obj))
コード例 #2
0
ファイル: WxchatRobots.py プロジェクト: loeyae/lspider_robot
    def reply_fn(self, robot):
        if not self.message_queue:
            return
        self.info("auto reply is starting")
        while True:
            try:
                message = self.message_queue.get_nowait()
                uuid = utils.md5("%s%s" % (message['user'], message['auser']))
                if uuid not  in self.prepared_session:
                    for fn in self.prepare_reply:
                        fn(robot, uuid)
                    self.prepared_session.add(uuid)
                nick = message['nick']
                reply = self.get_message(message['msg'], uuid)
                if reply:
                    if nick:
                        reply = u'@%s\u2005%s' % (nick, reply)
                    robot.send(reply, message['user'])
                time.sleep(0.1)
            except queue.Empty:
#                self.debug("queue is empty")
                time.sleep(0.1)
            except KeyboardInterrupt:
                break
            except Exception as e:
                self.error(traceback.format_exc())
コード例 #3
0
    def download(self, d, sd, file_urls):
        self.debug("download urls: %s" % file_urls)
        if isinstance(file_urls, (list, tuple)):
            return [self.download(d, sd, item) for item in file_urls]
        elif isinstance(file_urls, dict):
            return [self.download(d, sd, item) for _,item in file_urls.items()]
        else:
            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"}
            end = False
            retry = 0
            while end is False:
                try:
                    res = requests.get(file_urls, headers=headers, timeout=(30,30))
                    end = True
                except requests.exceptions.ConnectionError:
                    retry += 1
                    if retry > 5:
                        end = True
                except Exception:
                    end = True

            if res.status_code == BaseCrawler.STATUS_CODE_OK:
                ext = mimetypes.guess_extension(res.headers["Content-Type"])
                if ext is None:
                    ext = mimetypes.guess_extension(res.headers["Content-Type"], strict=False)
                file_name = "%s%s" % (utils.md5(file_urls), ext)
                full_file_name = os.path.join(d, file_name)
                short_file_name = os.path.join(sd, file_name)
                with open(full_file_name, "wb") as fp:
                    fp.write(res.content)
                    fp.close()
                res.close()
                return {"src": file_urls, "file": short_file_name}
            res.close()
            return {"src": file_urls, "file": ""}
コード例 #4
0
 def symbol(self):
     """
     获取唯一标识
     :return:
     """
     return md5(
         "%s:%s@%s:%s/%s" %
         (self.username, self.password, self.host, self.port, self.db))
コード例 #5
0
ファイル: LinksExtractor.py プロジェクト: loeyae/lspider
 def get_subdomain(self, link):
     parsed = urlparse(link['url'])
     extracted = tldextract.extract(link['url'])
     # if all((extracted.subdomain, extracted.subdomain != 'www', extracted.subdomain != self.subdomain)):
     if all((extracted.subdomain, extracted.subdomain != self.subdomain)):
         d = "%s.%s" % (extracted.subdomain.split(".").pop(), self.domain)
         url = "%s://%s" % (parsed.scheme, d)
         key = utils.md5(d)
         if key in self.linksofsubdomain:
             if parsed.path == '/' or not parsed.path:
                 self.linksofsubdomain[key]['title'] = link['title']
         else:
             self.linksofsubdomain[key] = {
                 "url": url,
                 "title": link['title']
             }
コード例 #6
0
 def symbol(self):
     return md5(
         "%s:%s@%s:%s/%s" %
         (self.username, self.password, self.host, self.port, self.path))