def get_access_token(cls, appid, appsecret, code): """ 通过code换取网页授权acces_token :return: 返回dict. keys:access_token,expires_in,refresh_token,openid,scope """ params = {'appid': appid, 'secret': appsecret, 'code':code, 'grant_type':'authorization_code'} return HttpUtil.get(cls._ACCESS_TOKEN_URL, params)
def get_user_info(cls, openid, access_token, lang='zh_CN'): """ 拉取用户信息, snsapi_base中不需此步骤 :return:用户信息dict """ params = {'access_token': access_token, 'openid': openid, 'lang':lang} return HttpUtil.get(cls._USERINFO_URL, params)
def refresh_access_token(cls, appid, refresh_token): """ 刷新auth_access_token :return: 返回dict, keys:access_token,expires_in,refresh_token,openid,scope """ params = {'appid':appid, 'grant_type':'refresh_token', 'refresh_token':refresh_token} return HttpUtil.get(cls._REFRESH_TOKEN_UEL, params)
def request_access_token(appid, appsecret): """ 网络请求获取access_token :param appid: :param appsecret: :return: {'access_token':'', 'expires_in':3600} """ url = WxApi.BASE_URL + "/cgi-bin/token?grant_type=client_credential" params = {'appid': appid, 'secret': appsecret} return HttpUtil.get(url, params)
def check_access_token(cls, openid, access_token): """ 检验授权凭证(access_token)是否有效 """ params = {'access_token':access_token, 'openid': openid} return HttpUtil.get(cls._CHECK_TOKEN_URL, params)
def _get(self, url, params=None): return HttpUtil.get(self._final_url(url), params)
def download_latest_images(page, directory): try: SystemUtil.restart_process(os.path.abspath(__file__)) html = BeautifulSoup( HttpUtil.get("https://www.pexels.com/zh-cn/new-photos?page=" + str(page)).text, features="lxml") articles = html.find_all("article") pages_html = BeautifulSoup(str( html.find("div", {"class": "pagination"})), features="lxml").find_all("a") page_total = int(pages_html[len(pages_html) - 2].text) print(page, len(articles), page_total) if page > page_total: page = 1 raise ValueError("page超出范围") for article in articles: # 图片id image_id = article["data-photo-modal-medium-id"] # 图片原始大小 # image_org_size = article["data-photo-modal-download-value-original"] # 图片下载链接 download_url = article["data-photo-modal-image-download-link"] image_name = f"pexels-photo-{image_id}.jpg" info_html = BeautifulSoup( HttpUtil.get("https://www.pexels.com/zh-cn/photo/" + image_id).text, features="lxml") tags = info_html.find("meta", { "name": "keywords" }).attrs["content"] if len(tags) > 0 and tags != "": # 简繁转换 tags = zhconv.convert(tags[:len(tags) - 7], 'zh-cn') tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "", tags).replace(",,", ",") s3.execute_commit(f""" INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}', '{download_url}','latest','{page}','{tags}') """) # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('href')).attrs["href"] # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('data-url')).attrs["data-url"] # 判断文件是否存在 if not os.path.exists(os.path.join(directory, image_name)): # 每张图片启用单个线程下载 # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name) # done.add_done_callback(ThreadPool.thread_call_back) asyncio.run( HttpUtil.download_one_async(download_url, directory, image_name)) global run_count run_count += 1 # 如果获取到的页数大于0不是最后一页 if page_total > 0 and page <= page_total and run_count <= 10: download_latest_images(page + 1, directory) else: if len(pages_html) > 0 and page <= page_total: page += 1 if page > page_total: page = 1 run_count = 0 except Exception as e: print(e) finally: print("当前活跃线程数:", threading.activeCount()) time.sleep(400) download_latest_images(page, directory)
def lin_shi_you_xiang_get_mail(prefix, id): url = Constants.LIN_SHI_YOU_XIANG + "/mailbox/" + prefix + "/" + id + "/source" return HttpUtil.get(url=url, data=None).text
def download_images(url, page, directory): """ 下载图片 :param url: 链接 :param page: 页 :param directory: 文件存放目录 :return: """ try: SystemUtil.restart_process(os.path.abspath(__file__)) html = BeautifulSoup(HttpUtil.get(url + str(page)).text, features="lxml") figure = html.find_all("figure") # 获取所有包含指定属性的标签 page_all = html.find_all(lambda tag: tag.has_attr('original-title')) page_total = int(page_all[len(page_all) - 1].text) print(page, len(figure), page_total) if page > page_total: page = 1 raise ValueError("page超出范围") for label in figure: image_id = label.attrs["data-wallpaper-id"] # 图片详情页 info_html = BeautifulSoup(HttpUtil.get("https://wallhaven.cc/w/" + image_id).text, features="lxml") tags_html = info_html.find_all("a", { "class": "tagname", "rel": "tag" }) # 图片的标签 tags = ",".join([tag_html.text for tag_html in tags_html]).replace("'", "") if len(tags) > 0 and tags != "": tags = TranslationUtil.translate_google(tags).replace(",", ",") tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "", tags).replace(",,", ",") download_url = info_html.find("img", { "id": "wallpaper" }).attrs["src"] if len(download_url) <= 0 or download_url == "": raise ConnectionError("获取下载链接失败") s3.execute_commit(f""" INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}', '{download_url}','latest','{page}','{tags}') """) image_name = download_url.split("/") image_name = image_name[len(image_name) - 1] # 判断文件是否存在 # if not os.path.exists(name): if not os.path.isfile(os.path.join(directory, image_name)): # 每张图片启用单个线程下载 # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name) # done.add_done_callback(ThreadPool.thread_call_back) asyncio.run( HttpUtil.download_one_async(download_url, directory, image_name)) global run_count run_count += 1 # 如果获取到的页数大于0不是最后一页,并且内存占用率小于80%时 if len(page_all) > 0 and page <= page_total and run_count <= 10: download_images(url, page + 1, directory) else: if len(page_all) > 0: page += 1 if page > page_total: page = 1 run_count = 0 except Exception as e: print(e) finally: print("当前活跃线程数:", threading.activeCount()) time.sleep(400) download_images(url, page, directory)