def __init__(self, video_no): self.video_no = video_no = video_no.lower().replace('-', '') # 搜索列表 list_url = 'https://ideapocket.com/search/list?keyword=' + video_no list_response = http.get(list_url, self.headers) list_html = list_response.text list_soup = BeautifulSoup(list_html, features="html.parser") # poster self.poster_url = list_soup.find('div', class_="swiper-wrapper").find( 'div', class_="item").find('img')['data-src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # 详情页 url = list_soup.find('div', class_="swiper-wrapper").find( 'div', class_="item").find('a')['href'] response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # fanart self.fanart_url = soup.find( 'div', class_="swiper-slide").find('img')['data-src'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no = video_no.lower().replace('-', '') # 搜索列表 list_url = 'https://www.wanz-factory.com/search/list/?q=' + video_no list_response = http.get(list_url, self.headers) list_html = list_response.text list_soup = BeautifulSoup(list_html, features="html.parser") # poster self.poster_url = self.site_url.rstrip('/') + list_soup.find( 'ul', class_="c-works-list").find('img')['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # 详情页 url = self.site_url.rstrip('/') + list_soup.find( 'ul', class_="c-works-list").find('a')['href'] response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find( 'div', class_="works-detail-package js-photo-swipe-target").find( 'img')['src'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no = video_no.lower().replace('-', '') # 搜索列表 list_url = 'https://www.tameikegoro.jp/search/list/?q=' + video_no list_response = http.get(list_url, self.headers) list_html = list_response.text list_soup = BeautifulSoup(list_html, features="html.parser") # poster self.poster_url = self.site_url.rstrip('/') + list_soup.find( 'ul', class_="wrap-content-list").find('img')['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # 详情页 url = self.site_url.rstrip('/') + list_soup.find( 'ul', class_="wrap-content-list").find('a')['href'] response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find( 'ul', class_="bx-detail-slider").find('img')['src'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no # 搜索列表 list_url = 'https://smt.hmp.jp/list.php' list_data = {'key': video_no} list_response = http.post(list_url, list_data, self.headers) list_html = list_response.text list_soup = BeautifulSoup(list_html, features="html.parser") # poster self.poster_url = self.site_url.rstrip('/') + list_soup.find( 'p', class_="mainImg").find('img')['data-original'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # 详情页 url = self.site_url.rstrip('/') + list_soup.find( 'p', class_="mainImg").find('a')['href'] response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find( 'p', class_="mainImg").find('img')['src'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no # 详情页 url = 'https://www.mgstage.com/product/product_detail/' + video_no + '/' response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") if video_no.startswith("SIRO"): number = video_no.split('-')[1] self.poster_url = self.siro_poster_url.format(number=number) self.fanart_url = self.siro_fanart_url.format(number=number) elif video_no.startswith("200GANA"): number = video_no.split('-')[1] self.poster_url = self.gana_poster_url.format(number=number) self.fanart_url = self.gana_fanart_url.format(number=number) else: detail_photo = soup.find('div', class_="detail_photo") if detail_photo is None: self.poster_url = '' self.fanart_url = '' else: self.poster_url = detail_photo.find( 'img', class_='enlarge_image')['src'] self.fanart_url = detail_photo.find( 'a', class_='link_magnify')['href'] # poster self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def fetch_data(): global _BASE_URL, _API_KEY since = datetime.now() + time_offset url = _BASE_URL + '/repos/pbeardshear/hub/commits' query = '?author={0}&since={1}&access_token={2}'.format('pbeardshear', since.isoformat(), _API_KEY) commits = http.get(url) return [task_list['selenium']]
def fetch_data(): global _BASE_URL, _API_KEY since = datetime.now() + time_offset url = _BASE_URL + '/repos/pbeardshear/hub/commits' query = '?author={0}&since={1}&access_token={2}'.format( 'pbeardshear', since.isoformat(), _API_KEY) commits = http.get(url) return [task_list['selenium']]
def __init__(self, video_no): self.video_no = video_no = video_no.lower() # 年龄检查页 data = { 'age_gate': '1', 'age_gate[age]': 'TVRnPQ==', 'action': 'age_gate_submit', 'age_gate[nonce]': '7d70b59573', '_wp_http_referer': '/all/shn-016/', 'confirm_action': '0' } age_check_headers = { # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', # 'Accept-Encoding': 'gzip, deflate, br', # 'Accept-Language': 'en-US,en;q=0.9', # 'Cache-Control': 'max-age=0', # 'Connection': 'keep-alive', # 'Content-Length': '162', # 'Content-Type': 'application/x-www-form-urlencoded', 'Cookie': '_ga=GA1.3.488997533.1609657427; _gid=GA1.3.1979187914.1609657427; _gat=1', 'Host': 'www.naturalhigh.co.jp', 'Origin': 'https://www.naturalhigh.co.jp', 'Referer': 'https://www.naturalhigh.co.jp/all/shn-016/', # 'Sec-Fetch-Dest': 'document', # 'Sec-Fetch-Mode': 'navigate', # 'Sec-Fetch-Site': 'same-origin', # 'Sec-Fetch-User': '******', # 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } http.post('https://www.naturalhigh.co.jp/wp-admin/admin-post.php', data=data, headers=age_check_headers) # 视频详情页html url = 'https://www.naturalhigh.co.jp/all/' + video_no + '/' response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = soup.find('img', class_="sam160")['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = soup.find('a', class_="popup-image")['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no # 年龄检查 http.get('https://ec.sod.co.jp/prime/_ontime.php', self.headers) # 详情页 url = 'https://ec.sod.co.jp/prime/videos/?id=' + video_no response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = soup.find('img', class_="sam160")['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = soup.find('a', class_="popup-image")['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def download_poster(video_number): (series, item) = video_number.split('-', 1) if series not in posters: return None poster_url = posters[series].format(item=item) if poster_url is None: print('Video %s poster_url is None'.format(video_number)) return None response = get(poster_url) return response.content
def get_chain_info(url=def_api, timeout=def_timeout): success = False try: uri = "/v1/chain/get_info" url = url + uri result = http.get(uri, url, timeout=timeout) success = True msg = result.json() except Exception as e: msg = str(e) logger.error('Failed to call %s error:%s', url, msg) return success, msg
def download_fanart(video_number): (series, item) = video_number.split('-', 1) if series not in fanarts: return None fanart_url = fanarts[series].format(item=item) if fanart_url is None: print('Video %s fanart_url is None'.format(video_number)) return None response = get(fanart_url) return response.content
def __init__(self, video_no): # 搜索列表 list_url = 'http://planetplus.jp/wp01/?s=' + video_no list_response = http.get(list_url, self.headers) list_html = list_response.text list_soup = BeautifulSoup(list_html, features="html.parser") # poster self.poster_url = list_soup.find('article').find('img')['data-src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # 详情页 url = list_soup.find('article').find('a')['href'] response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # fanart self.fanart_url = soup.find('div', class_="execphpwidget").find('a')['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def get_chain_info_from_node(url, time_out_sec=http_time_out_sec): success = False try: url = url + "/v1/chain/get_info" result = http.get(url, url, timeout=time_out_sec) msg = result.text if result.status_code == 200: success = True msg = result.json() except BaseException as e: msg = str(e) if not success: logger.error('Failed to call %s error:%s', url, msg) return success, msg
def __init__(self, connected, session_id, url): if (connected == True): cookies = { "JSESSIONID": session_id } else: cookies = {} res = http.get(url, cookies=cookies) matches = re.search('<input type="hidden" name="(.*?)" value="(.*?)">', res) if (connected == False): self.key1 = matches.group(1) self.key2 = matches.group(2) self.JSESSIONID = r.cookies['JSESSIONID'] else: self.key1 = matches.group(1) self.key2 = matches.group(2) self.JSESSIONID = session_id
def __init__(self, video_no): # 详情页 url = 'https://shark2012-av.com/products/index.php?pn=' + video_no response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = self.site_url + soup.find( 'div', class_="works-detail").find('img')['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = self.site_url + soup.find( 'div', class_="works-detail").find('a')['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no = video_no.lower().replace('-', '') # 详情页 url = 'https://www.mousouzoku-av.com/works/detail/' + video_no + '/' response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = self.site_url.rstrip('/') + soup.find('div', class_="bx-side-content bx-side-content-buy").find('img')['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find('div', class_="bx-pake js-photo_swipe").find('img')['src'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no = video_no.lower().replace('-', '') # 详情页 url = 'http://www.tsumabana.com/' + video_no + '.php' response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = self.post_url.format(video_no=video_no) self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = self.site_url + soup.find('article', class_="post").find('a')['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def getCurrentWeather(): """ 获取当前的天气信息 :return: """ # get请求获取天气的数据 weather_raw = requests.get( url='https://www.sojson.com/open/api/weather/json.shtml?city=深圳').json( ) if not weather_raw or not weather_raw.get('data') or not weather_raw.get( 'data').get('forecast'): return None weather_json = weather_raw.get('data').get('forecast')[0] # 实时天气 weather = weather_json.get('type') # 最低温度和最高温度 temp_low = weather_json.get('low').lstrip('低温').strip('.0℃').strip() temp_high = weather_json.get('high').lstrip('高温').strip('.0℃').strip() # 风 wind = weather_json.get('fx') weather_other = temp_low + " ~ " + temp_high + " ℃," + wind weather_db = WeatherModel.query.filter_by(current_date=curDate()).first() if weather_db: # 更新数据库数据 print('更新一条天气数据') weather_db.weather = weather weather_db.weather_other = weather_other db.session.commit() return weather_db else: # 插入数据库 print('新增一条天气数据') weather_new = WeatherModel(weather=weather, weather_other=weather_other, current_date=curDate()) db.session.add(weather_new) db.session.commit() return weather_new
def __init__(self, video_no): self.video_no = video_no = video_no.lower() # 详情页 url = 'https://www.km-produce.com/works/' + video_no response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = self.post_url.format(video_no=video_no) self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find( 'div', class_="details").find('a')['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def __init__(self, video_no): self.video_no = video_no = video_no.lower() # 详情页 url = 'https://www.prestige-av.com/goods/goods_detail.php?sku=' + video_no response = http.get(url, self.headers) html = response.text soup = BeautifulSoup(html, features="html.parser") # poster self.poster_url = self.site_url.rstrip('/') + soup.find( 'p', class_="package_layout").find('img', class_='border_image')['src'] self.poster_name = os.path.basename(self.poster_url) self.poster_ext = os.path.splitext(self.poster_name)[1] # fanart self.fanart_url = self.site_url.rstrip('/') + soup.find( 'p', class_="package_layout").find('a', class_='sample_image')['href'] self.fanart_name = os.path.basename(self.fanart_url) self.fanart_ext = os.path.splitext(self.fanart_name)[1]
def download_fanart(self): response = http.get(self.fanart_url, headers=self.headers) return response.content
def download_poster(self): response = http.get(self.poster_url, headers=self.headers) return response.content
def get_block_num(): get_info_api = api + '/v1/chain/get_info' response = http.get(get_info_api, get_info_api) return response.json()['head_block_num']
#!/usr/bin/env python ''' Author: iwenli License: Copyright © 2019 iwenli.org Inc. All rights reserved. Github: https://github.com/iwenli Date: 2020-11-20 11:26:02 LastEditors: iwenli LastEditTime: 2020-11-20 13:16:38 Description: ... ''' __author__ = 'iwenli' from ebook_qd import crawl_book from utils import http if __name__ == "__main__": # crawl_book() url = 'http://baidu.com' rep = http.get(url) print(rep)