def build(self, obj): if isinstance(obj, dict): return utils.md5(utils.url_encode(obj)) elif isinstance(obj, list): return utils.md5("".join(obj)) else: return utils.md5(str(obj))
def reply_fn(self, robot): if not self.message_queue: return self.info("auto reply is starting") while True: try: message = self.message_queue.get_nowait() uuid = utils.md5("%s%s" % (message['user'], message['auser'])) if uuid not in self.prepared_session: for fn in self.prepare_reply: fn(robot, uuid) self.prepared_session.add(uuid) nick = message['nick'] reply = self.get_message(message['msg'], uuid) if reply: if nick: reply = u'@%s\u2005%s' % (nick, reply) robot.send(reply, message['user']) time.sleep(0.1) except queue.Empty: # self.debug("queue is empty") time.sleep(0.1) except KeyboardInterrupt: break except Exception as e: self.error(traceback.format_exc())
def download(self, d, sd, file_urls): self.debug("download urls: %s" % file_urls) if isinstance(file_urls, (list, tuple)): return [self.download(d, sd, item) for item in file_urls] elif isinstance(file_urls, dict): return [self.download(d, sd, item) for _,item in file_urls.items()] else: headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"} end = False retry = 0 while end is False: try: res = requests.get(file_urls, headers=headers, timeout=(30,30)) end = True except requests.exceptions.ConnectionError: retry += 1 if retry > 5: end = True except Exception: end = True if res.status_code == BaseCrawler.STATUS_CODE_OK: ext = mimetypes.guess_extension(res.headers["Content-Type"]) if ext is None: ext = mimetypes.guess_extension(res.headers["Content-Type"], strict=False) file_name = "%s%s" % (utils.md5(file_urls), ext) full_file_name = os.path.join(d, file_name) short_file_name = os.path.join(sd, file_name) with open(full_file_name, "wb") as fp: fp.write(res.content) fp.close() res.close() return {"src": file_urls, "file": short_file_name} res.close() return {"src": file_urls, "file": ""}
def symbol(self): """ 获取唯一标识 :return: """ return md5( "%s:%s@%s:%s/%s" % (self.username, self.password, self.host, self.port, self.db))
def get_subdomain(self, link): parsed = urlparse(link['url']) extracted = tldextract.extract(link['url']) # if all((extracted.subdomain, extracted.subdomain != 'www', extracted.subdomain != self.subdomain)): if all((extracted.subdomain, extracted.subdomain != self.subdomain)): d = "%s.%s" % (extracted.subdomain.split(".").pop(), self.domain) url = "%s://%s" % (parsed.scheme, d) key = utils.md5(d) if key in self.linksofsubdomain: if parsed.path == '/' or not parsed.path: self.linksofsubdomain[key]['title'] = link['title'] else: self.linksofsubdomain[key] = { "url": url, "title": link['title'] }
def symbol(self): return md5( "%s:%s@%s:%s/%s" % (self.username, self.password, self.host, self.port, self.path))