Esempi in Python per get_accept, esempi in Python per util.util.get_accept

Esempio n. 1

0

Mostra file

 def get_ynote_web_header(self, mode: int = 0):
     headers = {
         "Content-Type": get_content_type(),
         "Cookie": self.cookie,
         "Host": self.Y_URL.split("/")[2],
         "Origin": self.Y_URL,
         "Referer": self.WEB_URL,
     }
     if mode:
         headers["Accept"] = get_accept("xhr")
     else:
         headers["Accept"] = get_accept("html")
     return headers

Esempio n. 2

0

Mostra file

 def get_login_headers(self, mode: int = 0, cookie: dict = {}):
     headers = {
         'Referer': self.LOGIN_URL,
     }
     if mode != 3:
         headers['Accept'] = get_accept('*') if mode == 2 else get_accept(
             'xhr')
     if mode == 1:
         headers['Content-Type'] = get_content_type('')
     elif mode == 2:
         headers['X-Requested-With'] = 'XMLHttpRequest'
     if len(cookie):
         headers['Cookie'] = encoder_cookie(cookie)
     return headers

Esempio n. 3

0

Mostra file

File: hotelDetail.py Progetto: zhujuanzhu/spider

 def __init__(self):
     self.default_hotel_id = 4889292
     self.header = {
         'Cookie': '',
         'Accept': get_accept('html'),
         'Content-Type': get_content_type(),
     }

Esempio n. 4

0

Mostra file

    def load_gather(self):
        """
        load gather proxy pool text
        If failured, you should reactive the cookie.
        """
        headers = {
            "Host": "www.gatherproxy.com",
            "Origin": "http://www.gatherproxy.com",
            "Referer": "http://www.gatherproxy.com/proxylist/anonymity/?t=Transparent",
            "Cookie": "_lang=en-US; _ga=GA1.2.1084455496.1548351129; _gid=GA1.2.1515017701.1552361687; ASP.NET_SessionId=ckin3pzyqyoyt3zg54zrtrct; _gat=1; arp_scroll_position=57",
            "Content-Type": get_content_type(),
            "Accept": get_accept("html"),
        }
        url = "http://www.gatherproxy.com/subscribe/infos"
        try:
            sid_url_req = requests.get(url, headers=headers, verify=False, timeout=10)
        except:
            return
        sid_url_html = BeautifulSoup(sid_url_req.text, "html.parser")
        sid_url = sid_url_html.find_all("div", class_="wrapper")[1].find_all("a")[0][
            "href"
        ]
        if len(sid_url.split("sid=")) < 2:
            echo("0|warning", "cookie error")
            self.get_cookie()
            self.load_gather()
            return
        sid = sid_url.split("sid=")[1]
        sid_url = "http://www.gatherproxy.com" + sid_url

        data = {"ID": sid, "C": "", "P": "", "T": "", "U": "0"}
        gatherproxy = requests.post(sid_url, headers=headers, data=data, verify=False)
        with codecs.open(data_dir + "gatherproxy", "w", encoding="utf-8") as f:
            f.write(gatherproxy.text)

Esempio n. 5

0

Mostra file

File: basicBilibili.py Progetto: zhujuanzhu/spider

 def get_api_headers(self, bv_id: str, types: int = 0) -> dict:
     if isinstance(bv_id, int):
         bv_id = "av{}".format(bv_id)
     if types == 0:
         return {"Accept": "*/*", "Referer": self.BASIC_BV_URL % bv_id}
     if types == 1:
         return {"Accept": get_accept("html"), "Host": self.BILIBILI_URL}

Esempio n. 6

0

Mostra file

 def search_goods_once(self, goods_name, index):
     if not os.path.exists('%scookie_alimama' % data_dir):
         print('alimama cookie not exist!!!')
         return
     with codecs.open('%scookie_alimama' % data_dir, 'r',
                      encoding='utf-8') as f:
         cookie = f.readlines()
     url_list = [
         'https://pub.alimama.com/items/search.json?auctionTag=&perPageSize=50&shopTag=&_tb_token_=',
         cookie[1][:-1], '&pvid=', cookie[2][:-1], '&t=',
         str(int(round(time.time() * 1000))), '&_t=',
         str(int(round(time.time() * 1000))), '&q=', goods_name
     ]
     headers = {
         'X-Requested-With': 'XMLHttpRequest',
         'Cookie': '',
         'Content-Type': get_content_type(),
         'Accept': get_accept('xhr'),
     }
     headers['Cookie'] = cookie[0][:-1]
     ca = basic_req(''.join(url_list), 2, header=headers)
     if ca.status_code != 200 or not 'data' in ca.json():
         if can_retry(''.join(url_list)):
             self.search_goods_once(goods_name, index)
         return
     page_list = ca.json()['data']['pageList']
     title = [
         '||'.join(
             [str(index['auctionId']), goods_name,
              str(index['zkPrice'])]) for index in page_list
     ][0]
     self.goods_name[index] = title
     print(title)

Esempio n. 7

0

Mostra file

    def have_places_once(self):
        """
        have places
        """
        url = 'http://elective.pku.edu.cn/elective2008/edu/pku/stu/elective/controller/supplement/refreshLimit.do'
        if not os.path.exists('%scookie' % data_path):
            print('Brush Cookie not exist!!!')
            return
        with open('%scookie' % data_path, 'r') as f:
            cookie = f.readlines()
        headers = {
            'X-Requested-With': 'XMLHttpRequest',
            'Cookie': '',
            'Content-Type': get_content_type(),
            'Accept': get_accept('xhr'),
            "Origin": "http://elective.pku.edu.cn",
            "Referer": "http://elective.pku.edu.cn/elective2008/edu/pku/stu/elective/controller/supplement/SupplyCancel.do",
        }
        headers['Cookie'] = cookie[0][:-1]

        data = {
            "index": '10',
            "seq": 'yjkc20141100016542',
        }

        ca = proxy_req(url, 11, data, header=headers)

        if not ca:
            if round(time.time()) - self.laster_timestamp > 60:
                send_email("Cookie failure", "Cookie failure")
            return False
        print(ca['electedNum'])
        self.laster_timestamp = round(time.time())
        return int(ca['electedNum']) < 120

Esempio n. 8

0

Mostra file

File: titleviews.py Progetto: zhujuanzhu/spider

    def getJianshuViews(self):
        ''' get jianshu views '''
        header = {'accept': get_accept('html')}

        for rounds in range(1, 4):
            url = self.JIANSHU_URL
            if rounds > 1:
                url += '?order_by=shared_at&page={}'.format(rounds)
            echo('1|debug', 'jianshu req url:', url)
            html = self.get_request(
                url, 0, lambda i: not i or not len(
                    i.find_all('div', class_='content')), header)
            if html is None:
                echo(0, 'None')
                return
            for index in html.find_all('li', class_=["", 'have-img']):
                if len(index.find_all('i')) < 3:
                    continue
                title = index.find_all('a', class_='title')[0].text.replace(
                    '`', '')
                jianshu_id = int(index['data-note-id'])
                jianshu_count = int(index.find_all('a')[-2].text)
                if title in self.title2slug:
                    temp_slug = self.title2slug[title]
                    self.jianshu_id[temp_slug] = jianshu_id
                    self.jianshu_views[temp_slug] = jianshu_count
                elif jianshu_id in self.jianshu_id_map:
                    temp_slug = self.jianshu_id_map[jianshu_id]
                    self.jianshu_id[temp_slug] = jianshu_id
                    self.jianshu_views[temp_slug] = jianshu_count
                else:
                    echo(1, title)

Esempio n. 9

0

Mostra file

 def get_tb_headers(self, url: str = "", refer_url: str = "") -> dict:
     headers = {"Accept": get_accept("html"), "User-Agent": get_use_agent()}
     if url != "":
         headers["Host"] = url.split("/")[2]
     if refer_url != "":
         headers["referer"] = refer_url
     return headers

Esempio n. 10

0

Mostra file

    def test_change_youdaoyun(self, atricle_id, body, article_name):
        """
        change youdaoyun article demo
        @param 'buildmd/data/cookie': cookie in youdaoyun web
        @param            article_id: change article No.
        @param:                 body: change article body
        @param:         article_name: change article name
        """
        url = 'https://note.youdao.com/yws/api/personal/sync?method=push&keyfrom=web&cstk=E3CF_lx8'
        headers = {
            'Cookie': '',
            'Content-Type': get_content_type(),
            'Accept': get_accept('xhr'),
            'Origin': 'https://note.youdao.com',
            'Referer': 'https://note.youdao.com/web'
        }
        if not os.path.exists('%scookie' % data_dir):
            print('Youdao Note cookie not exist!!!')
            return

        with codecs.open('%scookie' % data_dir, 'r', encoding='utf-8') as f:
            cookie = f.readline()
        headers['cookie'] = cookie[:-1]
        headers['Host'] = url.split('/')[2]

        file_list_url = 'https://note.youdao.com/yws/api/personal/file?method=listRecent&offset=0&limit=30&keyfrom=web&cstk=E3CF_lx8'
        file_data = {'cstk': 'E3CF_lx8'}
        ca = basic_req(file_list_url, 11, data=file_data, header=headers)
        if not len(ca):
            print('List Error')
            return
        change_data_origin = ca[atricle_id]['fileEntry']
        body_string = [
            '<?xml version="1.0"?><note xmlns="http://note.youdao.com" schema-version="1.0.3" file-version="0"><head/><body><para><coId>12-1550424181958</coId><text>',
            body, '</text><inline-styles/><styles/></para></body></note>'
        ]
        change_data = {
            'name': article_name,
            'fileId': change_data_origin['id'],
            'parentId': change_data_origin['parentId'],
            'domain': change_data_origin['domain'],
            'rootVersion': -1,
            'sessionId': '',
            'modifyTime': int(round(time.time())),
            'bodyString': "".join(body_string),
            'transactionId': change_data_origin['id'],
            'transactionTime': int(round(time.time())),
            'orgEditorType': change_data_origin['orgEditorType'],
            'tags': change_data_origin['tags'],
            'cstk': 'E3CF_lx8'
        }
        print(change_data)
        cb = basic_req(url, 12, data=change_data, header=headers)
        return cb

Esempio n. 11

0

Mostra file

 def get_item_basic(self, item_id: int, url: str = ""):
     url = self.ITEM_URL % item_id if url == "" else url
     headers = {"Accept": get_accept("html")}
     req = proxy_req(url, 2, header=headers, config={"allow_redirects": False})
     if req is None:
         if can_retry(url):
             return self.get_item_basic(item_id, url)
         return
     if req.status_code != 200:
         return self.get_item_basic(item_id, req.headers["Location"])
     return req

Esempio n. 12

0

Mostra file

File: upBilibili.py Progetto: zhujuanzhu/spider

 def get_m_html(self, bv_id: str) -> str:
     url = self.M_BILIBILI_URL % bv_id
     headers = {
         "Accept": get_accept("html"),
         "Host": url.split("/")[2],
         "User-Agent": get_use_agent("mobile"),
     }
     m_html = proxy_req(url, 3, header=headers)
     if len(m_html) < 1000:
         if can_retry(url):
             return self.get_m_html(bv_id)
         else:
             return ""
     return m_html

Esempio n. 13

0

Mostra file

def get_score(cookie: str):
    SCORE_URL = 'https://portal.w.pku.edu.cn/portal2017/bizcenter/score/retrScores.do'
    headers = {
        'Accept': get_accept('xhr'),
        'Host': 'portal.w.pku.edu.cn',
        'Origin': 'https://portal.w.pku.edu.cn',
        'Referer': 'https://portal.w.pku.edu.cn/portal2017/',
        'Cookie': cookie,

    }
    req = basic_req(SCORE_URL, 11, header=headers)
    if req is None or list(req.keys()) != ['success', 'xslb', 'xh', 'xm', 'scoreLists']:
        if can_retry(SCORE_URL):
            return get_score(cookie)
        else:
            return
    return req

Esempio n. 14

0

Mostra file

File: getproxy.py Progetto: onlyoneprogram/spider

    def get_cookie(self):
        """
        make cookie login
        PS: Though cookie expired time is more than 1 year,
            but It will be break when the connect close.
            So you need reactive the cookie by this function.
        """
        headers = {
            "Cookie":
            "_lang=en-US; _ga=GA1.2.1084455496.1548351129; _gid=GA1.2.1515017701.1552361687; ASP.NET_SessionId=ckin3pzyqyoyt3zg54zrtrct; _gat=1; arp_scroll_position=57",
            "Accept": get_accept("html") + ";q=0.9",
        }
        login_url = "http://www.gatherproxy.com/subscribe/login"

        cookie_html = basic_req(login_url, 3, header=headers)
        try:
            verify_text = re.findall('<span class="blue">(.*?)</span>',
                                     cookie_html)[0]
        except:
            return
        verify_list = verify_text.replace("= ", "").strip().split()
        num_map = {
            "Zero": 0,
            "One": 1,
            "Two": 2,
            "Three": 3,
            "Four": 4,
            "Five": 5,
            "Six": 6,
            "Seven": 7,
            "Eight": 8,
            "Nine": 9,
            "Ten": 10,
        }
        verify_num = [verify_list[0], verify_list[2]]
        for index, num in enumerate(verify_num):
            if num.isdigit():
                verify_num[index] = int(num)
            elif num in num_map:
                verify_num[index] = num_map[num]
            else:
                echo("0|error", "Error", num)
                # return False
        verify_code = 0
        error = True

        operation = verify_list[1]
        if (operation == "+" or operation == "plus" or operation == "add"
                or operation == "multiplied"):
            verify_code = verify_num[0] + verify_num[1]
            error = False
        if operation == "-" or operation == "minus":
            verify_code = verify_num[0] - verify_num[1]
            error = False
        if operation == "X" or operation == "multiplication":
            verify_code = verify_num[0] * verify_num[1]
            error = False
        if error:
            echo("0|error", "Error", operation)
        if not os.path.exists("%spassage" % data_dir):
            echo("0|warning", "gather passage not exist!!!")
            return
        with codecs.open("%spassage" % data_dir, "r", encoding="utf-8") as f:
            passage = [index[:-1] for index in f.readlines()]
        data = {
            "Username": passage[0],
            "Password": passage[1],
            "Captcha": str(verify_code),
        }
        time.sleep(2.163)
        r = requests.session()
        r.cookies = cj.LWPCookieJar()
        login_req = r.post(login_url, headers=headers, data=data, verify=False)

Esempio n. 15

0

Mostra file

    def match_goods(self):

        self.headers = {
            'X-Requested-With': 'XMLHttpRequest',
            'Cookie': '',
            'Content-Type': get_content_type(),
            'Accept': get_accept('xhr')
        }

        version = begin_time()
        changeHtmlTimeout(30)
        block_size = 10
        if not os.path.exists('%sgoods' % data_dir):
            print('goods file not exist!!!')
            return
        with codecs.open('%sgoods' % data_dir, 'r', encoding='utf-8') as f:
            wait_goods = f.readlines()
        goods_url = [
            re.findall('http.* ', index)[0].strip().replace('https', 'http')
            if 'http' in index and not '【' in index else False
            for index in wait_goods
        ]

        if not os.path.exists('%scollect_wyy' % data_dir):
            print('collect file not exist!!!')
            return
        with codecs.open('%scollect_wyy' % data_dir, 'r',
                         encoding='utf-8') as f:
            collect = f.readlines()
        self.title2map = {
            index.split("||")[1]: index.split("||")[0]
            for index in collect
        }

        threadings = []
        for index, url in enumerate(goods_url):
            if url == False:
                continue
            work = threading.Thread(target=self.get_goods_id_first,
                                    args=(
                                        url,
                                        index,
                                    ))
            threadings.append(work)
        url_len = len(threadings)
        for index in range((url_len - 1) // block_size + 1):
            begin_id = index * block_size
            end_id = min(url_len, (index + 1) * block_size)
            threadings_block = threadings[begin_id:end_id]

            for work in threadings_block:
                work.start()
            for work in threadings_block:
                work.join()

            time.sleep(random.randint(0, 9))

        write_body = [
            ' '.join([self.goods_map[index], body]) if index in self.goods_map
            else (' '.join([self.url2goods[goods_url[index]], body])
                  if goods_url[index] in self.url2goods else body)
            for index, body in enumerate(wait_goods)
        ]
        with codecs.open('%sgoods_one' % data_dir, 'w', encoding='utf-8') as f:
            f.write(''.join(write_body))
        end_time(version)

Esempio n. 16

0

Mostra file

    def bulk_import_alimama(self):
        """
        bulk import alimama
        """

        version = begin_time()
        if not os.path.exists('%scollect_wyy' % data_dir):
            print('Collect File not exist!!!')
            return
        with codecs.open('%scollect_wyy' % data_dir, 'r',
                         encoding='utf-8') as f:
            goods = f.readlines()
        self.goods_candidate = [index.split('||')[0] for index in goods]
        goods_len = len(self.goods_candidate)

        self.headers = {
            'X-Requested-With':
            'XMLHttpRequest',
            'Cookie':
            '',
            'Content-Type':
            get_content_type(),
            'Accept':
            get_accept('xhr'),
            'Origin':
            'http://pub.alimama.com',
            'Referer':
            'http://pub.alimama.com/promo/search/index.htm?q=%E7%AC%AC%E5%9B%9B%E5%8D%81%E4%B9%9D%E5%A4%A9%2019%E6%98%A5%E5%AD%A3&_t=1550891362391'
        }
        if not os.path.exists('%scookie_alimama' % data_dir):
            print('alimama cookie not exist!!!')
            return
        with codecs.open('%scookie_alimama' % data_dir, 'r',
                         encoding='utf-8') as f:
            cookie = f.readlines()
        url_list = [
            'https://pub.alimama.com/favorites/group/newList.json?toPage=1&perPageSize=40&keyword=&t=',
            str(int(round(time.time() * 1000))), '&_tb_token_=',
            cookie[1][:-1], '&pvid=', cookie[2][:-1]
        ]
        url = ''.join(url_list)
        self.headers['Cookie'] = cookie[0][:-1]
        self.headers['Host'] = url.split('/')[2]

        group_list = basic_req(url, 2, header=self.headers)

        if group_list.status_code != 200 or group_list.json(
        )['info']['message'] == 'nologin':
            print('group_list error')
            return
        group_list = group_list.json()['data']['result']
        group_list = [index['id'] for index in group_list]

        print(group_list)

        assert len(group_list) > (goods_len - 1) // 200

        threadings = []
        for index in range((goods_len - 1) // 200 + 1):
            work = threading.Thread(target=self.bulk_import_alimama_once,
                                    args=(
                                        index,
                                        group_list[index],
                                    ))
            threadings.append(work)
        for work in threadings:
            work.start()
        for work in threadings:
            work.join()
        end_time(version)