Esempio n. 1
0
def get_movie_data(location):
    website = "https://in.bookmyshow.com/" + location + "/movies"
    cookies = get_cookies(location)
    url = "https://in.bookmyshow.com/serv/getData?cmd=QUICKBOOK&type=MT"
    response = requests.get(url, cookies=cookies)
    response.raise_for_status()
    data = (json.loads(response.text))
    return data
Esempio n. 2
0
 def run(self):
     self.select_url(self.url_list)
     for url in self.new_yupoo_list:
         yupoo_href_list = self.yupoo_spider(url, list())
         self.get_ypimg_page(yupoo_href_list)
     for url in self.other_url_list:
         cookies, base_url = get_cookies.get_cookies(url)
         self.get_other_msg(cookies, base_url)
Esempio n. 3
0
 def setUpClass(cls) -> None:
     """
     方案一:获取cookies
     方案二:登录成功,并保存session会话状态
     """
     # cls.my_cookies = get_cookies.get_cookies()
     cls.my_header = get_cookies.get_cookies()
     print(cls.my_header)
Esempio n. 4
0
def get_movie_data(location):
	website="https://in.bookmyshow.com/"+location+"/movies"
	cookies=get_cookies(location)
	url="https://in.bookmyshow.com/serv/getData?cmd=QUICKBOOK&type=MT"
	response=requests.get(url,cookies=cookies)
	response.raise_for_status()
	data=(json.loads(response.text))
	return data
Esempio n. 5
0
def main():
    #获取url和cookies
    #url是每本书的网址
    url = 'https://book.douban.com/subject/1826007/'
    cookies = get_cookies()

    #调用函数
    emotion_analysis(url, cookies)
    #
    book_wordcloud(url, cookies)
Esempio n. 6
0
def attach_movie_url(location):
    website = "https://in.bookmyshow.com"
    website_with_location = "https://in.bookmyshow.com/" + location + "/movies"
    cookies = get_cookies(location)
    response = requests.get(website_with_location)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    mlist = get_movie_list(location)
    for div in soup.findAll('div', {'class': '__name'}):
        name = div.findAll('a')[0].text.strip()
        href = div.findAll('a')[0].attrs['href']
        #a[name]=website+href
        for movie in mlist:
            if name == movie['Title']:
                movie['url'] = website + href
    return mlist
Esempio n. 7
0
def attach_movie_url(location):
	website="https://in.bookmyshow.com"
	website_with_location="https://in.bookmyshow.com/"+location+"/movies"
	cookies=get_cookies(location)
	response=requests.get(website_with_location)
	response.raise_for_status()
	soup=BeautifulSoup(response.text,'html.parser')
	mlist=get_movie_list(location)
	for div in soup.findAll('div',{'class':'__name'}):
		name=div.findAll('a')[0].text.strip()
		href=div.findAll('a')[0].attrs['href']
		#a[name]=website+href
		for movie in mlist:
			if name == movie['Title']:
				movie['url']=website+href
	return mlist
Esempio n. 8
0
 def other_url_handl(self, url, store_id):
     other_headers = None
     response = request_url(url, headers=self.headers, proxies_list=self.ip_pool)
     try:
         shop_id = re.search(r"/shop_detail/(\w\d+)", response.url).group(1)
     except:
         print(response.url)
     else:
         if response is not None:
             cookies, base_url = get_cookies.get_cookies(url, ip_pool)
             url = parse_url(base_url.lower(), self.server_url, "")
             for item in cookies:
                 other_headers = {
                     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
                     "cookie": "%s=%s" % (item.name, item.value)
                 }
             self.get_other_msg(other_headers, url, store_id, shop_id)
Esempio n. 9
0
 def other_url_handl(self, url, store_id):
     """
     获取shop_id,构造json信息的地址,对请求头进行初步处理
     :param url: 首页地址
     :param store_id: 商店ID
     :return: None
     """
     other_headers = None
     response = request_url(url,
                            headers=self.headers,
                            proxies_list=self.ip_pool)
     # 对于请求错误的url进行重新请求,直到正确
     if "b.oijgvrq.cn" in response.url:
         self.other_url_handl(url, store_id)
     # 从首页重定向的url中获取商品的id用于后面构造url
     try:
         shop_id = re.search(r"/shop_detail/(\w\d+)", response.url).group(1)
     except:
         print(response.url)
     else:
         if response is not None:
             cookies = get_cookies.get_cookies(url)
             url = parse_url(response.url.lower(), self.server_url, "")
             # 对请求头进行处理,获取请求首页时服务器设置的cookie值中的token字段构造请求头
             for item in cookies:
                 other_headers = {
                     "User-Agent":
                     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
                     "cookie": "%s=%s" % (item.name, item.value)
                 }
             # 对于一些特殊的商店因为请求头中服务器没有设置token字段,所有手动构造
             if "token" not in other_headers["cookie"]:
                 other_headers = {
                     "User-Agent":
                     "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
                     "cookie":
                     "token=Mzk4MDk3Q0E5RTZCN0I1MkYwMTYwNDlCQUNFNkQ5QzVFOEZCOTI1OEEwOTA2MDc0QzUzRTVCNDVDMTg1RTgzRTZBNTY1MTZDQTNFNDFCRkI2ODZGRTgxRjQxRDU3MEZD;"
                 }
             self.get_other_msg(other_headers, url, store_id, shop_id)
#!/bin/python2.7
import urllib2
import json
from get_cookies import get_cookies
from StringIO import StringIO
from ProgressBar.progress_bar import ProgressBar

COOKIE, COOKIE_2 = get_cookies()
CLASS_SLUG       = "startup-001"
CALLBACK         = "some"
OUT_FILE         = "user_data.json"

def get_page(url, use_cookie_2 = False):
    opener = urllib2.build_opener()

    if use_cookie_2:
        c = COOKIE_2
    else:
        c = COOKIE

    opener.addheaders.append(('Cookie', c))
    return opener.open(url)

forum_thread_template = "https://class.coursera.org/" + CLASS_SLUG + "/api/forum/threads/%d"
user_template = "https://www.coursera.org/maestro/api/user/profiles?user-ids=%d&callback=" + CALLBACK

uids = set()
thread_count = 1
number_of_threads = 0

# figure out total number of threads
Esempio n. 11
0
]
headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36"
}
with open("./ip_pool", "r") as f:
    content = f.read()
ip_pool = json.loads(content)
server_url = "service/album/get_album_themes_list.jsp"
for url in url_list:
    response = requests.get(url,
                            proxies=random.choices(ip_pool)[0],
                            headers=headers)
    shop_id = re.search(r"/shop_detail/(.*)", response.url).group(1)
    if response is not None:
        cookies, base_url = get_cookies(url, ip_pool)
        url = parse_url(base_url.lower(), server_url, "")
        for item in cookies:
            other_headers = {
                "User-Agent":
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36",
                # "cookie": "%s=%s" % (item.name, item.value)
                "cookie":
                "UM_distinctid=172fef76482421-09a957e7ea6b66-4353761-144000-172fef764838c7; "
                "sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22A202007011648174290197177%22%2C%22first_id%22%3A%22172fef75cf638b-0c1df4c5ab5928-4353761-1327104-172fef75cf78d9%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22%24device_id%22%3A%22172fef75cf638b-0c1df4c5ab5928-4353761-1327104-172fef75cf78d9%22%7D; "
                "token=Mzk4MDk3Q0E5RTZCN0I1MkYwMTYwNDlCQUNFNkQ5QzVFOEZCOTI1OEEwOTA2MDc0QzUzRTVCNDVDMTg1RTgzRTZBNTY1MTZDQTNFNDFCRkI2ODZGRTgxRjQxRDU3MEZD; "
                "CNZZDATA1275056938=120497081-1594357916-%7C1594363316; "
                "JSESSIONID=B74EC8A017C3DDD861F3E6E17F3D6C3A"
            }
        params = {
            "page_index": 1,
Esempio n. 12
0
import requests
import time
import threading
import pandas as pd
import get_cookies
'''
cookies = {'_vis_opt_exp_67_combi': '2', '_vis_opt_exp_81_exclude': '1', '_ga': 'GA1.2.1502718080.1561463733', '_gid': 'GA1.2.1646304843.1561463733', '_vis_opt_s': '1%7C', 'SNLB2': '12-001', '_vwo_sn': '0%3A1', '_vis_opt_test_cookie': '1', '_vwo_ds': '3%3Aa_0%2Ct_0%3A0%241561463723%3A23.3810494%3A%3A%3A%3A1', 'D_ZID': 'B7582915-2342-3B54-9343-BC1DCE799E8E', 'satisfaction-survey-chance': '0.0403899275885848', 'fonts-loaded': 'true', 'INLB': '01-004', 'sr': '0%7cfalse', 'D_SID': '213.46.252.136:48rZgOaLq07BxrV0Qr6LtG+9ZX6f1oCFngMEVSQzTPk', 'D_UID': 'D644992F-2B1E-3E41-9DE9-F217B4362A49', '.ASPXANONYMOUS': '4V5eW9xpxKl3EkqZq8zIBv4lcHtHlKJzUXRaBTg5iCG5uOS5DfKvk0gLrD0PqmD1sknqjSbDyPTzkKIcVGWAfn4hfTnoLb8FBqUB1iW71ervGlotG8otKQ6aALP4MaNLDjjJUCLn1dz5YorxzHNosKif--g1', '_vwo_uuid_v2': 'D15EFEB276818AA2126C9C28B803EC581|b916da0e2f06e5601dfa81595da1a0aa', 'html-classes': 'js supports-placeholder', 'D_HID': 'A4A9E92C-2A48-3C2C-8A88-C4964FA76B91', 'D_IID': 'D383A0CE-44F4-3CD9-A86F-F857C5DAE6B6', 'D_ZUID': '03044E65-40B9-39DC-B123-50B3846BD9DA', '_vwo_uuid': 'D15EFEB276818AA2126C9C28B803EC581', '__RequestVerificationToken': 'tDON4sgpOIM-o_KwfC0sjX3NodUMYfxV7ZcnZLGxAMNlLJ8D9WrJLi0-aQAInnJeaDput6sP7jq1PanBIRBgzBJDo9c1'}
'''

cookies = get_cookies.get_cookies()
cookie_string = "; ".join([str(x) + "=" + str(y) for x, y in cookies.items()])

headers = {
    'Accept':
    'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
    'Accept-Encoding':
    'gzip, deflate, br',
    'Accept-Language':
    'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,nl;q=0.6',
    'Cache-Control':
    'max-age=0',
    'Connection':
    'keep-alive',
    'Cookie':
    cookie_string,
    #'..ASPXANONYMOUS=d0oK0LM1NdCj5Jj81GX6xZCQ2RoA8LXNtZgyqdpEUbZA6HeGTGn_8Fg6rS8qtSGhEMPhHCzkAQ9WNwsQPJOz2ZcJru6sMaSMPmq5gjexZt8jXJfP17NXvS1uQaI92DoKwyAxOlbmfhmx8roCgeP2Yly7A3o1; sr=0%7cfalse; INLB=01-002; html-classes=js supports-placeholder; _vwo_uuid_v2=D78F6C8B5734A4EB42802737D85D9997B|bb168ce01705a1c8e305a7b53c0daf84; fonts-loaded=true; D_IID=791293A9-C17B-3312-B7D2-ACEC88EBCF6F; D_UID=6D9DEFD3-9936-33CF-BAD9-B8A336E733D5; D_ZID=FA92A681-5C62-3D00-A6E0-FFCFA2DFE000; D_ZUID=4AAFAD98-FC9A-397B-BFD0-CC4CAD64ABF8; D_HID=FE2E611F-6D24-3E8E-82DC-A8B080B475A7; D_SID=85.145.109.88:KlfxLH44zOxwMWXky4Fdeo0TMw982Lv2FwcHBwJhkZY; _vis_opt_s=1%7C; _vis_opt_test_cookie=1; _vwo_uuid=D78F6C8B5734A4EB42802737D85D9997B; _vwo_ds=3%3Aa_0%2Ct_0%3A0%241560642413%3A15.2871134%3A%3A%3A%3A0; _ga=GA1.2.2014485093.1560642416; _gid=GA1.2.1849743777.1560642416; cookiePolicy_16=allowPersonalisatie=True&allowAdvertenties=True; oil_data={%22opt_in%22:true%2C%22version%22:%221.3.0-RELEASE%22%2C%22localeVariantName%22:%22enEN_00%22%2C%22localeVariantVersion%22:0%2C%22customPurposes%22:[]%2C%22consentString%22:%22BOiNyJoOiNyJoBQABBENCX-AAAAoR6_-faqaRo25-P7J9kRFAL6lgBrPSFAQKQAIQAeCJWBiKgUkyDUoCUEIAoBAAARASCJARBgQEAESgAuAAJAgAgCCAAAIBAAAAAAAAAAAAAAAAA%22%2C%22configVersion%22:0}; rtb-platform=improve; satisfaction-survey-chance=0.0438349624368525; __RequestVerificationToken=iYFvdjp0cFTjaPyPxg2Qz0kjxnX4rdaVIPbkDn1YlLtIOXJ7bPEM3A7tM9DjF95SRidYCg9uIsQDtLrRV9iX-FRA01I1; _vis_opt_exp_80_exclude=1; _vis_opt_exp_75_combi=1; __utma=72423812.2014485093.1560642416.1560685213.1560685213.1; __utmc=72423812; __utmz=72423812.1560685213.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmb=72423812.1.10.1560685213; lzo=koop=%2fkoop%2fheel-nederland%2f&huur=%2fhuur%2fheel-nederland%2f&europe=%2feurope%2fheel-europa%2fhuur%2f&nieuwbouw=%2fnieuwbouw%2fheel-nederland%2f; _vwo_sn=40055%3A7; SNLB2=12-001; utag_main=v_id:016b5d8981a1001abec4ed9a3ddd03068001806000bd0$_sn:4$_ss:0$_st:1560687325603$vapi_domain:funda.nl$dc_visit:4$ses_id:1560682473031%3Bexp-session$_pn:7%3Bexp-session$dc_event:7%3Bexp-session$dc_region:eu-central-1%3Bexp-session',
    'Host':
    'www.funda.nl',
    'Referer':
    'https://www.funda.nl/en/koop/',
    'Upgrade-Insecure-Requests':