Example #1
0
def get_bing():
    """
    获取必应图片地址
    :return:
    """
    data = {'format': 'js', 'idx': 0, 'n': 1}
    try:
        response = HttpUtil.get_json(
            url='http://cn.bing.com/HPImageArchive.aspx', data=data)
        logging.debug(response)
    except Exception as e:
        logging.error("网络请求错误:", e)
        time.sleep(120)
        get_bing()
    images = response["images"]
    url = "http://cn.bing.com" + images[0]["url"].split("&")[0]

    # 拼接目录路径
    directory = os.path.join(Constants.APP_DIRECTORY, "images")

    image_name = url.split("=")[1]
    # 拼接文件绝对路径
    image_path = os.path.join(directory, image_name)
    # 下载图片
    HttpUtil.download_file(url, directory, image_name)
    # 分割文件名和后缀,如果后缀不为bmp
    if os.path.splitext(image_name)[1] != "bmp":
        # 转为bmp
        image_path = FileUtil.image_to_bmp(image_path)
Example #2
0
def download_chromedriver():
    """
    下载chrome驱动
    http://chromedriver.storage.googleapis.com/index.html
    :return:
    """
    # 获取版本号列表
    url = "http://chromedriver.storage.googleapis.com/"
    result = BeautifulSoup(HttpUtil.get(url, {
        "delimiter": "/",
        "prefix": ""
    }).text,
                           features="lxml")
    prefix = result.find_all("prefix")
    # 过滤
    # info = [s.extract() for s in prefix('prefix')]

    local_version = get_local_version(prefix)

    # 获取版本下面的文件列表
    driver_list = BeautifulSoup(HttpUtil.get(url, {
        "delimiter": "/",
        "prefix": local_version
    }).text,
                                features="lxml")
    filename_list = driver_list.find_all("key")

    for s in filename_list:
        s = s.text
        # 如果在文件名中找到系统平台名称
        if s.find(sys.platform) != -1:
            filename = s[len(local_version):]
            # 下载文件
            HttpUtil.download_file(url + s, None, filename)
            FileUtil.zip_extract(filename, None)
Example #3
0
def download_taobao_chromedriver():
    """
    下载淘宝镜像chromedriver
    http://npm.taobao.org/mirrors/chromedriver
    :return:
    """
    # 获取版本号列表
    url = "http://npm.taobao.org/mirrors/chromedriver/"
    result = BeautifulSoup(HttpUtil.get(url).text, features="lxml")
    prefix = result.find("pre").find_all("a")
    # 过滤
    # info = [s.extract() for s in prefix('prefix')]

    local_version_url = url + get_local_version(prefix)

    # 获取版本下面的文件列表
    driver_list = BeautifulSoup(HttpUtil.get(local_version_url).text,
                                features="lxml")
    filename_list = driver_list.find_all("a")

    for s in filename_list:
        s = s.text
        # 如果在文件名中找到系统平台名称
        if s.find(sys.platform) != -1:
            # 下载文件
            HttpUtil.download_file(local_version_url + s, None, s)
            FileUtil.zip_extract(s, None)
Example #4
0
 def search(self):
     url = 'http://www.wyl.cc/'
     home_soup = BeautifulSoup(HttpUtil.request(url))
     target_url = home_soup.find(attrs={"class": "entry-title"}).find('a')['href'].encode('utf-8')
     target_soup = BeautifulSoup(HttpUtil.request(target_url))
     content_soup = target_soup.find(attrs={"class": "single-content"}).findAll('p')
     text = content_soup[0].text
     url = content_soup[1].find('a')['href'].encode('utf-8')
     self.__send(text, url)
Example #5
0
 def sample(self):
     page = 1
     url='http://search.51job.com/list/070200%252C00,000000,0000,00,9,99,java,0,1.html?lang=c&stype=2&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&list_type=0&confirmdate=9&dibiaoid=0'
     opener = HttpUtil.init_opener()
     HttpUtil.opener_request(url, opener)
     while page < 50:
         print '第%s页' % page
         self.__research(page, opener)
         page=page+1
Example #6
0
 def space_code_provision(self, new_space_code):
     r = """/vb.htm?IntelligentInfo={"version":1.1,"method":"get","devicetype":1}"""
     for host in self.ssh.hosts:
         data = HttpUtil.httpget(host[0], r)
         json = data.split(" ")[-1]
         print("get intelligentinfo = %s" % json)
         output = re.sub(""""pointcode":"[0-9]*""", '''"pointcode":"''' + new_space_code, json)
         newdata = "/vb.htm?language=ie&IntelligentInfo=" + urllib.parse.quote(output)
         HttpUtil.httpget(host[0], newdata)
Example #7
0
    def install_openvpn(self, client_key, client_crt, firmware=''):
        if firmware == '':
            self.upgrade('openvpn_patch_20160426113637_A1.bin')
        else:
            self.upgrade('openvpn_patch_20160426113637_A1.bin')

        self.ssh.do_scp_put(client_key, '/opt/ipnc/openvpn/client.key')
        self.ssh.do_scp_put(client_crt, '/opt/ipnc/openvpn/client.crt')
        for host in self.ssh.hosts:
            HttpUtil.httpget(host[0], '/cgi-bin/net_adv.cgi?openvpn_enable=1')
Example #8
0
    def install_openvpn(self, client_key, client_crt, firmware=""):
        if firmware == "":
            self.upgrade("openvpn_patch_20160426113637_A1.bin")
        else:
            self.upgrade("openvpn_patch_20160426113637_A1.bin")

        self.ssh.do_scp_put(client_key, "/opt/ipnc/openvpn/client.key")
        self.ssh.do_scp_put(client_crt, "/opt/ipnc/openvpn/client.crt")
        for host in self.ssh.hosts:
            HttpUtil.httpget(host[0], "/cgi-bin/net_adv.cgi?openvpn_enable=1")
Example #9
0
 def set_ntp(self, ntp_server, ntp_interval, ntp_type):
     for host in self.ssh.hosts:
         HttpUtil.httpcfg(
             host[0],
             "language=ie&datestampenable3="
             + str(ntp_type)
             + "&sntpip="
             + ntp_server
             + "&ntpinterval="
             + str(ntp_interval),
         )
Example #10
0
 def space_code_provision(self, new_space_code):
     r = '''/vb.htm?IntelligentInfo={"version":1.1,"method":"get","devicetype":1}'''
     for host in self.ssh.hosts:
         data = HttpUtil.httpget(host[0], r)
         json = data.split(' ')[-1]
         print('get intelligentinfo = %s' % json)
         output = re.sub('''"pointcode":"[0-9]*''',
                         '''"pointcode":"''' + new_space_code, json)
         newdata = '/vb.htm?language=ie&IntelligentInfo=' + urllib.parse.quote(
             output)
         HttpUtil.httpget(host[0], newdata)
Example #11
0
 def search(self):
     url = 'http://www.wyl.cc/'
     home_soup = BeautifulSoup(HttpUtil.request(url))
     target_url = home_soup.find(attrs={
         "class": "entry-title"
     }).find('a')['href'].encode('utf-8')
     target_soup = BeautifulSoup(HttpUtil.request(target_url))
     content_soup = target_soup.find(attrs={
         "class": "single-content"
     }).findAll('p')
     text = content_soup[0].text
     url = content_soup[1].find('a')['href'].encode('utf-8')
     self.__send(text, url)
Example #12
0
def get_360_image(images):
    for image in images:
        url = image["url"]
        # 拼接目录路径
        directory = os.path.join(Constants.APP_DIRECTORY, "images")
        # 如果目录不存在则创建
        if not os.path.exists(directory):
            os.makedirs(directory)

        urls = url.split("/")
        # 拼接文件绝对路径
        image_path = os.path.join(directory, urls[len(urls) - 1])

        HttpUtil.download_file(image_path, url)
Example #13
0
def get_myssl_ip():
    """
    通过myssl查询dns
    :return:
    """

    new_hosts = delete_dns(Constants.GITHUB_DOMAIN)

    for domain in Constants.GITHUB_DOMAIN:
        try:
            data = {"qtype": 1, "host": domain, "qmode": -1}
            response = HttpUtil.get(url=Constants.MYSSL_DNS, data=data)
            jsp = json.loads(response.text)
            if jsp["code"] == 0 and jsp["error"] is None:
                result_data = jsp["data"]
                addr_us = result_data["01"][0]["answer"]["records"]
                # addr_hk = result_data["852"][0]["answer"]["records"]
                # addr_cn = result_data["86"][0]["answer"]["records"]

                # 拼接host
                for us in addr_us:
                    new_hosts.append(us["value"] + " " + domain + "\n")
        except Exception as e:
            print("错误:", e)

    update_hosts(new_hosts)
Example #14
0
 def get_user_info(cls, openid, access_token, lang='zh_CN'):
     """
     拉取用户信息, snsapi_base中不需此步骤
     :return:用户信息dict
     """
     params = {'access_token': access_token, 'openid': openid, 'lang':lang}
     return HttpUtil.get(cls._USERINFO_URL, params)
Example #15
0
 def refresh_access_token(cls, appid, refresh_token):
     """
     刷新auth_access_token
     :return: 返回dict, keys:access_token,expires_in,refresh_token,openid,scope
     """
     params = {'appid':appid, 'grant_type':'refresh_token', 'refresh_token':refresh_token}
     return HttpUtil.get(cls._REFRESH_TOKEN_UEL, params)
Example #16
0
def get_360_category():
    data = {'c': 'WallPaper', 'a': 'getAllCategoriesV2', 'from': '360chrome'}
    category = HttpUtil.get(url='http://wallpaper.apc.360.cn/index.php',
                            data=data)
    if category["errno"] != "0":
        raise MsgException('请求接口错误', category["errmsg"])
    return category["data"]
Example #17
0
def jian_shu_article_retrive(url):
    html = HttpUtil.request(url)
    content_soup = BeautifulSoup(
        html, fromEncoding="utf-8").find(attrs={"class": "article"})
    title = content_soup.find(attrs={"class": "title"}).text.encode('utf-8')
    content = content_soup.find(attrs={"class": "show-content"})
    return (title, content)
Example #18
0
 def get_access_token(cls, appid, appsecret, code):
     """
     通过code换取网页授权acces_token
     :return: 返回dict. keys:access_token,expires_in,refresh_token,openid,scope
     """
     params = {'appid': appid, 'secret': appsecret, 'code':code, 'grant_type':'authorization_code'}
     return HttpUtil.get(cls._ACCESS_TOKEN_URL, params)
Example #19
0
def lin_shi_you_xiang_list(prefix):
    """
    获取邮箱列表
    :param prefix: 邮箱前缀
    :return:
    """
    url = Constants.LIN_SHI_YOU_XIANG + "/api/v1/mailbox/" + prefix
    return HttpUtil.get_json(url=url, data=None)
Example #20
0
 def chick_search(self):
     url = 'http://www.59xihuan.cn/'
     home_soup = BeautifulSoup(HttpUtil.request(url))
     content_soup = home_soup.find(attrs={"class": "pic_text1"})
     text = content_soup.find('p').text
     url = url + content_soup.find('img')['bigimagesrc'].encode('utf-8')
     self.__send(text, url)
     print ''
Example #21
0
def query_view_by_url(author_url, blog_url):
    url = 'http://www.jianshu.com/' + author_url
    soup = BeautifulSoup(HttpUtil.request(url))
    blogs_soup = soup.find(attrs={"class": "article-list latest-notes"}).findAll('li')
    for blog_soup in blogs_soup:
        blog = service.parse_blog(blog_soup)
        if blog.url == blog_url:
            return blog
Example #22
0
def short_time_mail_list(last_id):
    """
    查询邮件列表
    :param last_id:
    :return:
    """
    url = Constants.SHORT_TIME_MAIL + "/mail/list"
    return HttpUtil.get_json(url=url, data={"last_id": last_id})
Example #23
0
def short_time_get_mail(id):
    """
    查询邮件内容
    :param last_id:
    :return:
    """
    url = Constants.SHORT_TIME_MAIL + "/zh-Hans/mail/detail"
    return HttpUtil.get_json(url=url, data={"id": id})
Example #24
0
 def chick_search(self):
     url = 'http://www.59xihuan.cn/'
     home_soup = BeautifulSoup(HttpUtil.request(url))
     content_soup = home_soup.find(attrs={"class": "pic_text1"})
     text = content_soup.find('p').text
     url = url + content_soup.find('img')['bigimagesrc'].encode('utf-8')
     self.__send(text, url)
     print ''
Example #25
0
def lin_shi_you_xiang_apply(prefix):
    """
    获取邮箱
    :param prefix: 邮箱前缀
    :return:
    """
    url = Constants.LIN_SHI_YOU_XIANG + "/api/v1/mailbox/keepalive"
    data = {"force_change": 1, "mailbox": prefix, "_ts": round(time.time() / 1000)}
    return HttpUtil.get_json(url=url, data=data)
Example #26
0
def send_weibo_pics(text, urls, browser):
    browser.get('http://m.weibo.cn/mblog')
    time.sleep(3)
    browser.find_element_by_id('txt-publisher').send_keys(text.decode('utf-8'))
    for i,url in enumerate(urls):
        if i==0:
            open(r'/home/1.jpg', 'wb').write(HttpUtil.request(url))
            pic_browser = browser.find_element_by_class_name('picupload')
            pic_browser.send_keys(r'/home/1.jpg')
            time.sleep(8)
        else:
            open(r'/home/1.jpg', 'wb').write(HttpUtil.request(url))
            pic_browser = browser.find_element_by_name('pic')
            pic_browser.send_keys(r'/home/1.jpg')
            time.sleep(8)
    os.remove(r'/home/1.jpg')
    browser.find_element_by_link_text('发送').click()
    print '发送成功'
Example #27
0
def send_weibo_pics(text, urls, browser):
    browser.get('http://m.weibo.cn/mblog')
    time.sleep(3)
    browser.find_element_by_id('txt-publisher').send_keys(text.decode('utf-8'))
    for i, url in enumerate(urls):
        if i == 0:
            open(r'/home/1.jpg', 'wb').write(HttpUtil.request(url))
            pic_browser = browser.find_element_by_class_name('picupload')
            pic_browser.send_keys(r'/home/1.jpg')
            time.sleep(8)
        else:
            open(r'/home/1.jpg', 'wb').write(HttpUtil.request(url))
            pic_browser = browser.find_element_by_name('pic')
            pic_browser.send_keys(r'/home/1.jpg')
            time.sleep(8)
    os.remove(r'/home/1.jpg')
    browser.find_element_by_link_text('发送').click()
    print '发送成功'
Example #28
0
def publish_articles():
    url = 'http://www.jianshu.com'
    html = BeautifulSoup(HttpUtil.request(url))
    articles_soup = [ parse_article_soup(article)  for article in html.find(attrs={"class": "article-list thumbnails"}).findAll('li')]
    sorted_articles = sorted(articles_soup, key=lambda articl_soup:articl_soup[0], reverse=True)
    article_body = service.jian_shu_article_retrive(url + sorted_articles[0][1])
    browser = LoginController.get_browser()
    passwd_dict = PasswdUtil.get_passwd_dict()
    send_text = '《%s》 又到每天推荐文章的时候到了,这些都是精选的枕边读物哦,希望大家喜欢@当幸福敲不开门[害羞][害羞][害羞] ' % article_body[0]
    service.send_weibo(send_text, service.txt_to_pic(article_body[1], browser),  LoginController.mobile_login(browser, passwd_dict[Const.WEIBO_USERNAME], passwd_dict[Const.WEIBO_PASSWD]))
Example #29
0
def lin_shi_you_xiang_delete(prefix, id):
    """
    删除邮件delete请求
    :param id:  邮件编号
    :param prefix: 邮箱前缀
    :return:
    """
    url = Constants.LIN_SHI_YOU_XIANG + "/api/v1/mailbox/" + prefix + "/" + id
    res = HttpUtil.delete(url=url, data=None)
    res_json = json.loads(res.text)
Example #30
0
 def request_access_token(appid, appsecret):
     """
     网络请求获取access_token
     :param appid:
     :param appsecret:
     :return: {'access_token':'', 'expires_in':3600}
     """
     url = WxApi.BASE_URL + "/cgi-bin/token?grant_type=client_credential"
     params = {'appid': appid, 'secret': appsecret}
     return HttpUtil.get(url, params)
Example #31
0
def short_time_mail_apply():
    """
    随机申请shorttimemail.com邮箱
    :return: 邮箱号
    """
    prefix = StringUtil.random_lowercase_alphanumeric(9)
    suffix = "@shorttimemail.com"
    data = {"prefix": prefix, "suffix": suffix}
    # post续期30分钟:/mail/continue
    # post销毁:/mail/destory
    # post删除邮件:/mail/delete ,参数:{ ids: ids.join('|') }以|分割字符串
    res = HttpUtil.get_json(url=Constants.SHORT_TIME_MAIL + "/mail/apply", data=data)
    if res.code != 200:
        raise MsgException(res.msg)
    return prefix + suffix
Example #32
0
    def _common_post(self, url, ddata, verify=None):
        """
        :return: (ret_code, dict_data/err_msg)
        """
        if self.BASE_URL not in url:
            url = self.BASE_URL + url
        results = HttpUtil.post(url, ddata, ctype='xml', verify=verify)

        if results.get('return_code', '') == 'SUCCESS':
            assert results.get('sign') == self._generate_sign(**results), 'sign error, not from wechat pay server'
            if results.get('result_code','') == 'SUCCESS':
                return 0, results
            else:
                return 1, results.get('err_code_des', '')
        else:
            return 1,results.get('return_msg', '')
Example #33
0
def user_info(user_url):
    url = 'http://www.jianshu.com' + user_url
    soup = BeautifulSoup(HttpUtil.request(url))
    user = User()
    base_info_soup = soup.find(attrs={"class": "basic-info"})
    user.name = base_info_soup.find('h3').text.encode('utf-8')
    signed_soup = base_info_soup.find(attrs={"class": "signed_author"})
    if signed_soup:
        user.is_signed = 0
    nums_soup = soup.find(attrs={"class": "user-stats"}).findAll('b')
    user.focus = int(nums_soup[0].text.encode('utf-8'))
    user.fans = int(nums_soup[1].text.encode('utf-8'))
    user.blog_nums = int(nums_soup[2].text.encode('utf-8'))
    user.word_nums = int(nums_soup[3].text.encode('utf-8'))
    user.like_nums = int(nums_soup[4].text.encode('utf-8'))
    return user
Example #34
0
    def _common_post(self, url, ddata, cert=None):
        """
        :return: (ret_code, dict_data/err_msg)
        """
        if self.BASE_URL not in url:
            url = self.BASE_URL + url
        results = HttpUtil.post(url, ddata, ctype='xml', cert=cert)

        if results.get('return_code', '') == 'SUCCESS':
            assert results.get('sign') == self._generate_sign(**results), 'sign error, not from wechat pay server'
            if results.get('result_code','') == 'SUCCESS':
                return 0, results
            else:
                return 1, results.get('err_code_des', '')
        else:
            return 1,results.get('return_msg', '')
Example #35
0
def get_360_update_image():
    # order排序,start从第几幅图开始(用于分页),count每次加载的数量最大200
    data = {
        'c': 'WallPaper',
        'a': 'getAppsByOrder',
        'order': 'create_time',
        'start': 0,
        'count': 200,
        'from': '360chrome'
    }
    response = HttpUtil.get(url='http://wallpaper.apc.360.cn/index.php',
                            data=data)
    if response["errno"] != "0":
        raise MsgException('请求接口错误', response["errmsg"])

    images = response["data"]
    get_360_image(images)
Example #36
0
def get_360_category_image():
    category = get_360_category()
    logging.debug(category)
    # cid分类ID,start从第几幅图开始(用于分页),count每次加载的数量最大200
    data = {
        'c': 'WallPaper',
        'a': 'getAppsByCategory',
        'cid': '36',
        'start': 0,
        'count': 200,
        'from': '360chrome'
    }
    response = HttpUtil.get(url='http://wallpaper.apc.360.cn/index.php',
                            data=data)
    if response["errno"] != "0":
        raise MsgException('请求接口错误', response["errmsg"])

    images = response["data"]
    get_360_image(images)
    logging.debug(images)
Example #37
0
def get_short_time_mail_dns():
    """
    通过shorttimemail.com查询DNS
    :return:
    """
    new_hosts = delete_dns(Constants.GITHUB_DOMAIN)

    for domain in Constants.GITHUB_DOMAIN:
        try:
            data = {"server": "8.8.8.8", "rrtype": "A", "domain": domain}
            response = HttpUtil.get(url=Constants.SHORT_TIME_MAIL_DNS, data=data)
            jsp = json.loads(response.text)
            if jsp["code"] == 0:
                # 拼接host
                for us in jsp["data"]:
                    new_hosts.append(us["value"] + " " + domain + "\n")
        except Exception as e:
            print("错误:", e)

    update_hosts(new_hosts)
Example #38
0
def txt_to_pic(txt, browser):
    url = 'http://www.changweibo.com/'
    js = """document.getElementById('ueditor_0').contentWindow.document.getElementsByClassName('view')[0].innerHTML='%s';
    """ % str(txt).replace('\n', '<br>').replace('\'', '"')
    browser.get(url)
    time.sleep(5)
    browser.execute_script(js)
    time.sleep(3)
    browser.find_element_by_xpath(
        "//a[@class='btn btn-success btn-lg']").click()
    time.sleep(10)
    browser.switch_to_frame('ueditor_0')
    html = browser.page_source
    soup = BeautifulSoup(html)
    data = {"reserve_check": 1, "text": "", "html": soup.find('body')}
    url = 'http://www.changweibo.com/convert_changweibo_com.php'
    response = HttpUtil.request_post(url, data)
    img_url = json.loads(response)['image_url']
    print(img_url)
    return img_url
Example #39
0
def get_tag(page):
    """
    获取标签
    :param page: 页码
    :return:
    """
    html = BeautifulSoup(
        HttpUtil.get(f"https://wallhaven.cc/tags?page={str(page)}").text,
        features="lxml")
    tags_html = html.find_all("a", {"class": "sfw"})
    for tag_html in tags_html:
        url = tag_html.attrs["href"]
        tag_id = url[url.rfind("/") + 1:]
        tag_text = tag_html.text
        print(tag_id, tag_text)

    # 获取所有包含指定属性的标签
    page_all = html.find_all(lambda tag: tag.has_attr('original-title'))
    page_total = page_all[len(page_all) - 1].text
    # 如果不是最后一页,那么就继续下载下一页
    if page != page_total:
        get_tag(page + 1)
Example #40
0
def publish_articles():
    url = 'http://www.jianshu.com'
    html = BeautifulSoup(HttpUtil.request(url))
    articles_soup = [
        parse_article_soup(article)
        for article in html.find(attrs={
            "class": "article-list thumbnails"
        }).findAll('li')
    ]
    sorted_articles = sorted(articles_soup,
                             key=lambda articl_soup: articl_soup[0],
                             reverse=True)
    article_body = service.jian_shu_article_retrive(url +
                                                    sorted_articles[0][1])
    browser = LoginController.get_browser()
    passwd_dict = PasswdUtil.get_passwd_dict()
    send_text = '《%s》 又到每天推荐文章的时候到了,这些都是精选的枕边读物哦,希望大家喜欢@当幸福敲不开门[害羞][害羞][害羞] ' % article_body[
        0]
    service.send_weibo(
        send_text, service.txt_to_pic(article_body[1], browser),
        LoginController.mobile_login(browser,
                                     passwd_dict[Const.WEIBO_USERNAME],
                                     passwd_dict[Const.WEIBO_PASSWD]))
Example #41
0
def txt_to_pic(txt, browser):
    url = 'http://www.changweibo.com/'
    js ="""document.getElementById('ueditor_0').contentWindow.document.getElementsByClassName('view')[0].innerHTML='%s';
    """ %  str(txt).replace('\n', '<br>').replace('\'', '"')
    browser.get(url)
    time.sleep(5)
    browser.execute_script(js)
    time.sleep(3)
    browser.find_element_by_xpath("//a[@class='btn btn-success btn-lg']").click()
    time.sleep(10)
    browser.switch_to_frame('ueditor_0')
    html = browser.page_source
    soup = BeautifulSoup(html)
    data={
        "reserve_check":1,
        "text":"",
        "html":soup.find('body')
    }
    url = 'http://www.changweibo.com/convert_changweibo_com.php'
    response = HttpUtil.request_post(url, data)
    img_url =  json.loads(response)['image_url']
    print(img_url)
    return img_url
Example #42
0
def find_blogs(url):
    soup = BeautifulSoup(HttpUtil.request(url))
    blogs = map(lambda blog_soup:parse_blog(blog_soup), soup.find(attrs={"class": "article-list thumbnails"}).findAll('li'))
    return filter(lambda blog : blog is not None, blogs)
Example #43
0
 def __research(self, page, opener):
     html = 'http://search.51job.com/jobsearch/search_result.php?jobarea=070200%2C00&keyword=java&curr_page=' + str(page)
     response = HttpUtil.opener_request(html, opener)
     soup = BeautifulSoup(response)
     job_list = soup.find(id='resultList').findAll('div' ,attrs={"class": "el"})[1:]
     self.__insert_jobs(map(self.__get_51job_detail, job_list))
Example #44
0
 def set_ntp(self, ntp_server, ntp_interval, ntp_type):
     for host in self.ssh.hosts:
         HttpUtil.httpcfg(
             host[0], 'language=ie&datestampenable3=' + str(ntp_type) +
             '&sntpip=' + ntp_server + '&ntpinterval=' + str(ntp_interval))
Example #45
0
def lin_shi_you_xiang_get_mail(prefix, id):
    url = Constants.LIN_SHI_YOU_XIANG + "/mailbox/" + prefix + "/" + id + "/source"
    return HttpUtil.get(url=url, data=None).text
Example #46
0
def download_images(url, page, directory):
    """
    下载图片
    :param url: 链接
    :param page: 页
    :param directory: 文件存放目录
    :return:
    """
    try:
        SystemUtil.restart_process(os.path.abspath(__file__))

        html = BeautifulSoup(HttpUtil.get(url + str(page)).text,
                             features="lxml")
        figure = html.find_all("figure")
        # 获取所有包含指定属性的标签
        page_all = html.find_all(lambda tag: tag.has_attr('original-title'))
        page_total = int(page_all[len(page_all) - 1].text)

        print(page, len(figure), page_total)
        if page > page_total:
            page = 1
            raise ValueError("page超出范围")

        for label in figure:
            image_id = label.attrs["data-wallpaper-id"]

            # 图片详情页
            info_html = BeautifulSoup(HttpUtil.get("https://wallhaven.cc/w/" +
                                                   image_id).text,
                                      features="lxml")
            tags_html = info_html.find_all("a", {
                "class": "tagname",
                "rel": "tag"
            })
            # 图片的标签
            tags = ",".join([tag_html.text
                             for tag_html in tags_html]).replace("'", "")
            if len(tags) > 0 and tags != "":
                tags = TranslationUtil.translate_google(tags).replace(",", ",")
                tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "",
                              tags).replace(",,", ",")

            download_url = info_html.find("img", {
                "id": "wallpaper"
            }).attrs["src"]
            if len(download_url) <= 0 or download_url == "":
                raise ConnectionError("获取下载链接失败")

            s3.execute_commit(f"""
            INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) 
            VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}',
            '{download_url}','latest','{page}','{tags}')
            """)

            image_name = download_url.split("/")
            image_name = image_name[len(image_name) - 1]
            # 判断文件是否存在
            # if not os.path.exists(name):
            if not os.path.isfile(os.path.join(directory, image_name)):
                # 每张图片启用单个线程下载
                # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name)
                # done.add_done_callback(ThreadPool.thread_call_back)
                asyncio.run(
                    HttpUtil.download_one_async(download_url, directory,
                                                image_name))
        global run_count
        run_count += 1

        # 如果获取到的页数大于0不是最后一页,并且内存占用率小于80%时
        if len(page_all) > 0 and page <= page_total and run_count <= 10:
            download_images(url, page + 1, directory)
        else:
            if len(page_all) > 0:
                page += 1
            if page > page_total:
                page = 1
            run_count = 0

    except Exception as e:
        print(e)
    finally:
        print("当前活跃线程数:", threading.activeCount())
        time.sleep(400)
        download_images(url, page, directory)
Example #47
0
 def _get(self, url, params=None):
     return HttpUtil.get(self._final_url(url), params)
Example #48
0
def jian_shu_article_retrive(url):
    html = HttpUtil.request(url)
    content_soup = BeautifulSoup(html, fromEncoding="utf-8").find(attrs={"class": "article"})
    title = content_soup.find(attrs={"class": "title"}).text.encode('utf-8')
    content = content_soup.find(attrs={"class": "show-content"})
    return (title, content)
Example #49
0
 def _post(self, url, ddata):
     final_url = self._final_url(url)
     return HttpUtil.post(final_url, ddata, ctype='json')
Example #50
0
 def _final_url(self, url):
     new_url = WxApi.BASE_URL + url
     final_url = HttpUtil.url_update_query(new_url, access_token=self.access_token)
     return final_url
Example #51
0
 def check_access_token(cls, openid, access_token):
     """
     检验授权凭证(access_token)是否有效
     """
     params = {'access_token':access_token, 'openid': openid}
     return HttpUtil.get(cls._CHECK_TOKEN_URL, params)
Example #52
0
def download_latest_images_selenium(page, directory):
    """
    使用selenium获取
    :param page:
    :param directory:
    :return:
    """
    SystemUtil.restart_process(os.path.abspath(__file__))
    driver = ReptileUtil.selenium_driver(
        "https://www.pexels.com/new-photos?page=" + str(page))
    try:
        articles = driver.find_elements_by_tag_name("article")
        next_page = True
        try:
            driver.find_element_by_xpath(
                "/html/body/section/div[4]/div/a[@rel='next']")
        except Exception as e:
            next_page = False
        # 获取当前所有窗口句柄(窗口A、B)
        main_window = driver.current_window_handle
        print(articles)
        for article in articles:
            # 图片id
            image_id = article.get_attribute("data-photo-modal-medium-id")
            info_url = "https://www.pexels.com/photo/" + image_id
            # 通过执行js打开新标签页并访问url
            driver.execute_script(f"window.open('{info_url}')")
            driver.switch_to.window(driver.window_handles[-1])
            tags = ""
            if driver.title.find("500") == -1:
                tags = driver.find_element_by_xpath(
                    "//meta[@name='keywords']").get_attribute("content")
                tags = TranslationUtil.translate_google(tags).replace(",", ",")
                tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "",
                              tags).replace(",,", ",")
            # 关闭当前窗口。
            driver.close()
            # 关闭新选项卡后回到主窗口,必须做这一步,否则会引发错误
            driver.switch_to.window(main_window)
            # 图片下载链接
            download_url = f"https://images.pexels.com/photos/{image_id}/pexels-photo-{image_id}.jpeg?dl={image_id}.jpg"
            s3.execute_commit(f"""
            INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) 
            VALUES('{image_id}','jpg','{download_url}','latest','{page}','{tags}')
            """)
            image_name = f"pexels-photo-{image_id}.jpg"
            # 判断文件是否存在
            if not os.path.exists(os.path.join(directory, image_name)):
                asyncio.run(
                    HttpUtil.download_one_async(download_url, directory,
                                                image_name))
        global run_count
        run_count += 1

        # 如果获取到的页数大于0不是最后一页
        if next_page and run_count <= 10:
            download_latest_images(page + 1, directory)
        else:
            if next_page:
                page += 1
            else:
                page = 1
            run_count = 0

    except Exception as e:
        print(e)
    finally:
        # 关闭当前窗口。
        driver.close()
        # 关闭浏览器并关闭chreomedriver进程
        driver.quit()
        print("当前活跃线程数:", threading.activeCount())
        time.sleep(400)
        download_latest_images(page, directory)
Example #53
0
def download_latest_images(page, directory):
    try:
        SystemUtil.restart_process(os.path.abspath(__file__))

        html = BeautifulSoup(
            HttpUtil.get("https://www.pexels.com/zh-cn/new-photos?page=" +
                         str(page)).text,
            features="lxml")
        articles = html.find_all("article")
        pages_html = BeautifulSoup(str(
            html.find("div", {"class": "pagination"})),
                                   features="lxml").find_all("a")
        page_total = int(pages_html[len(pages_html) - 2].text)

        print(page, len(articles), page_total)
        if page > page_total:
            page = 1
            raise ValueError("page超出范围")

        for article in articles:
            # 图片id
            image_id = article["data-photo-modal-medium-id"]
            # 图片原始大小
            # image_org_size = article["data-photo-modal-download-value-original"]
            # 图片下载链接
            download_url = article["data-photo-modal-image-download-link"]
            image_name = f"pexels-photo-{image_id}.jpg"

            info_html = BeautifulSoup(
                HttpUtil.get("https://www.pexels.com/zh-cn/photo/" +
                             image_id).text,
                features="lxml")
            tags = info_html.find("meta", {
                "name": "keywords"
            }).attrs["content"]
            if len(tags) > 0 and tags != "":
                # 简繁转换
                tags = zhconv.convert(tags[:len(tags) - 7], 'zh-cn')
                tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "",
                              tags).replace(",,", ",")
            s3.execute_commit(f"""
            INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags) 
            VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}',
            '{download_url}','latest','{page}','{tags}')
            """)
            # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('href')).attrs["href"]
            # dl = info_html.find(lambda tag: tag.has_attr('data-id') and tag.has_attr('data-url')).attrs["data-url"]

            # 判断文件是否存在
            if not os.path.exists(os.path.join(directory, image_name)):
                # 每张图片启用单个线程下载
                # done = ThreadPool.pool.submit(HttpUtil.download_file, download_url, directory, image_name)
                # done.add_done_callback(ThreadPool.thread_call_back)
                asyncio.run(
                    HttpUtil.download_one_async(download_url, directory,
                                                image_name))

        global run_count
        run_count += 1

        # 如果获取到的页数大于0不是最后一页
        if page_total > 0 and page <= page_total and run_count <= 10:
            download_latest_images(page + 1, directory)
        else:
            if len(pages_html) > 0 and page <= page_total:
                page += 1
            if page > page_total:
                page = 1
            run_count = 0

    except Exception as e:
        print(e)
    finally:
        print("当前活跃线程数:", threading.activeCount())
        time.sleep(400)
        download_latest_images(page, directory)