def findinfo(articleid, mode='uid'): if mode == 'link': url = articleid html = get_html_jp(url) page1 = re.findall(r'/digital/videoa/-/list/=/.*/id=\d+/page=(\d+)/', html) title = re.findall(r'<title>(.*) - エロ動画・アダルトビデオ - FANZA動画</title>', html) elif mode == 'uid': url = "https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=%s/" % articleid html = get_html_jp(url) page1 = re.findall( r'/digital/videoa/-/list/=/article=actress/id=\d+/page=(\d+)/', html) title = re.findall(r'<title>(.*) - エロ動画・アダルトビデオ - FANZA動画</title>', html) if page1 == []: page1 = 1 else: page3 = [] for i in page1: if i not in page3: page3.append(int(i)) page4 = max(page3) page1 = page4 return (page1, title[0])
def precid(searchcid): searchurl = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid={}/'.format( searchcid) html = get_html_jp(searchurl) soup = BeautifulSoup(html, 'lxml') title = soup.title.string body = soup.find('table', attrs={'class': 'mg-b20'}) title = soup.title.string red = soup.find('span', attrs={'class': 'red'}).string title = red + title photo = soup.find('div', attrs={'class': 'tx10 pd-3 lh4'}).a.get('href') pushdate = body.find_all('tr')[0].find('td').find_next_sibling().string time = body.find_all('tr')[1].find('td').find_next_sibling().string performer = body.find_all('tr')[2].find( 'td').find_next_sibling().span.a.string num = body.find_all('tr')[-1].find('td').find_next_sibling().string text = ''' `{}` [DVD ]({}) *発売日:*{} *収録時間:*{} *出演者:*{} *品番:* `{}` [官方信息]({}) '''.format(title, photo, pushdate, time, performer, num, searchurl) return text
def new_work(self): url = 'https://faleno.jp/top/work/' response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') title = soup.title.string body = soup.find('div', attrs={'class': 'box_kanren01'}) results = body.find_all('div', attrs={'class': 'waku_kanren01'}) boxlist = [] for i in results: boxdict = {} liv = i.find('a') imgs = i.find('img') url = liv.get('href') boxdict['url'] = url work = re.findall(r'https://faleno.jp/top/works/(.*)/', str(url))[0] boxdict['work'] = work name = imgs.get('alt') boxdict['name'] = name img = imgs.get('src') boxdict['img'] = img boxlist.append(boxdict) env = Environment(loader=PackageLoader(__name__, "templates")) # 创建一个包加载器对象 template = env.get_template('faleno.md') # 获取一个模板文件 temp_out = template.render(data=boxlist, title=str(title)) #print(temp_out) # 渲染 return (temp_out)
def actress(self): url = 'https://faleno.jp/top/actress/' response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') title = soup.find('section', attrs={'class': 'bread'}).p.contents[1] group = soup.find_all('li', attrs={'data-mh': 'group01'}) groups = [] for i in group: box = {} links = i.a.get('href') name = i.find('div', attrs={'class': 'text_name'}) id = re.findall( r'<div class="text_name">(.*)<span>(.*)</span></div>', str(name))[0] actress = id[0] eng_name = id[1] box['links'] = links box['name'] = actress box['eng'] = eng_name groups.append(box) env = Environment(loader=PackageLoader(__name__, "templates")) # 创建一个包加载器对象 template = env.get_template('faleno_actress.md') # 获取一个模板文件 temp_out = template.render(data=groups, title=str(title)) #print(temp_out) # 渲染 return (temp_out)
def photo(self, id): url = 'https://faleno.jp/top/works/%s' % id response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') photos = soup.find_all('a', attrs={'class': 'pop_img'}) photo = [] for i in photos: photo.append(i.get('href')) return (photo)
def dmmonecid(searchcid): searchcid = searchcid.replace('-', '00') searchurl = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid={}/'.format( searchcid) html = get_html_jp(searchurl) ciddataa, notitle = ciddata(html) if ciddataa == '指定されたページが見つかりません': return ciddataa, notitle temp_out = template_cid(ciddataa) return temp_out, notitle
def dmmcid(in_q, out_q): while in_q.empty() is not True: url = in_q.get() #url = 'https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=1060823/' html = get_html_jp(url) list = re.findall( r'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=([_0-9a-z]+)/', html) #print(url,list) out_q.append(list) in_q.task_done()
def prephotos(searchurl): #print(searchurl) html = get_html_jp(searchurl) soup = BeautifulSoup(html, 'lxml') photourlss = soup.find_all('img', attrs={'class': 'mg-b6'}) photourls = re.findall( r'(https://pics.dmm.co.jp/digital/video/.*?/.*?.jpg)', str(photourlss)) photolist = list(photourls) #print(photolist) jpg = [] for i in photolist: ii = list(i) ii.insert(-6, 'jp') iii = ''.join(ii) iii = iii.replace('-jp', 'jp-', 1) jpg.append(iii) return (jpg)
def work(self, id): url = 'https://faleno.jp/top/works/%s' % id response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') box = {} img = soup.find('a', attrs={'class': 'pop_sample'}).img.get('src') box['img'] = img title = soup.find('a', attrs={'class': 'pop_sample'}).img.get('alt') text = soup.find('div', attrs={'class': 'box_works01_text'}).p.string box['text'] = text file = soup.find_all('li', attrs={'class': 'clearfix'}) box['url'] = url for i in file: box[i.find(text=True).strip()] = i.p.string env = Environment(loader=PackageLoader(__name__, "templates")) # 创建一个包加载器对象 template = env.get_template('faleno_work.md') # 获取一个模板文件 temp_out = template.render(data=box, title=str(title)) #print(temp_out) # 渲染 return (temp_out)
def actress_work(self, name): name = name.replace(' ', '_').lower() url = 'https://faleno.jp/top/actress/%s/' % name response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') profile = {} title = soup.title.string bar = soup.find('div', attrs={'class': 'bar02'}).h1 barr = re.findall(r'<h1>(.*)<span>(.*)</span></h1>', str(bar))[0] img = soup.find('div', attrs={ 'class': 'box_actress02_left' }).img.get('src') files = soup.find('div', attrs={ 'class': 'box_actress02_right' }).find_all('li', attrs={'class': 'clearfix'}) box = {} for i in files: box[i.find(text=True).strip()] = i.p.string body = soup.find('div', attrs={'class': 'box_kanren01'}) results = body.find_all('div', attrs={'class': 'waku_kanren01'}) boxlist = [] for i in results: boxdict = {} liv = i.find('a') imgs = i.find('img') url = liv.get('href') work = re.findall(r'https://faleno.jp/top/works/(.*)/', str(url))[0] boxdict['work'] = work boxdict['url'] = url name = imgs.get('alt') boxdict['name'] = name img = imgs.get('src') boxdict['img'] = img boxlist.append(boxdict) env = Environment(loader=PackageLoader(__name__, "templates")) # 创建一个包加载器对象 template = env.get_template('faleno.md') # 获取一个模板文件 temp_out = template.render(data=boxlist, title=str(title)) #print(temp_out) # 渲染 return (temp_out)
def search(self, word, searchmode='temp'): url = 'https://faleno.jp/top/?s=%s' % word response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') title = soup.find('section', attrs={'class': 'bread'}).p.contents[1] try: ifresult = re.findall(r'に一致する作品は見つかりませんでした', str(soup)) noresult = "{}(没有找到相应内容的作品):{}".format(ifresult, title) if 'に一致する作品は見つかりませんでした' in ifresult: return noresult except Exception as e: print(e) results = soup.find_all('div', attrs={'class': 'waku_kanren01'}) #print(title) boxlist = [] for i in results: boxdict = {} liv = i.find('a') imgs = i.find('img') url = liv.get('href') work = re.findall(r'https://faleno.jp/top/works/(.*)/', str(url))[0] boxdict['work'] = work boxdict['url'] = url name = imgs.get('alt') boxdict['name'] = name img = imgs.get('src') boxdict['img'] = img boxlist.append(boxdict) if searchmode == 'search': return boxlist if searchmode == 'temp': env = Environment(loader=PackageLoader(__name__, "templates")) # 创建一个包加载器对象 template = env.get_template('faleno.md') # 获取一个模板文件 temp_out = template.render(data=boxlist, title=str(title)) #print(temp_out) # 渲染 return (temp_out)
def dmmsearchall_data(searchstr): #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか' url = 'https://www.dmm.co.jp/search/=/searchstr={}/sort=rankprofile/'.format( searchstr) html = get_html_jp(url) #判断有无结果 result = re.findall(r'(に一致する商品は見つかりませんでした。)', html) noresult = 'に一致する商品は見つかりませんでした。' try: if noresult in result: stitle = 1 return (noresult, stitle) except Exception: pass soup = BeautifulSoup(html, 'lxml') searchbody = soup.find('div', attrs={'class': 'd-area'}) try: stitle = re.findall(r'<title>(.*?)</title>', html)[0] except Exception: stitle = '検索結果' boxall = searchbody.find('div', attrs={'class': 'd-sect'}) onebox = str(boxall).split('<div>') boxlist = [] for box in onebox: boxdict = {} notitle = 0 if box: try: litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>', box)[0] #print(litetitle) if litetitle == None: notitle = 1 except: notitle = 1 try: cid = re.findall( r'<a href=\"https://www\.dmm\.co\.jp/.*?/cid=(\w+)/\?.*?\">', box)[0] boxdict['cid'] = cid except: boxdict['cid'] = '-' try: keywords = re.findall( r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>', box) keyword = ','.join(keywords) boxdict['keyword'] = keyword except: boxdict['keyword'] = '-' try: links = re.findall( r'<a href=\"(https://www\.dmm\.co\.jp/.*?-/detail/=/cid=\w+/\?.*?)\">', box)[0] boxdict['links'] = links except: boxdict['links'] = '-' try: img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)', box)[0] boxdict['img'] = img except Exception as e: boxdict['img'] = '-' try: title = re.findall(r'alt=\"(.*)\" src', box)[0] boxdict['title'] = title except Exception as e: boxdict['title'] = '-' try: sublinks = re.findall( r'<span><a href=\"(.*?)\">.*?</a></span>', box) boxdict['sublinks'] = sublinks[0] except Exception as e: boxdict['sublinks'] = '-' try: subtexts = re.findall( r'<span><a href=\".*?\">(.*?)</a></span>', box)[0] boxdict['subtexts'] = subtexts except: boxdict['subtexts'] = '-' if notitle == 0: #print(boxdict) boxlist.append(boxdict) return (boxlist, stitle)
def dmmlinks_data(links): #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか' url = links html = get_html_jp(url) #判断有无结果 soup = BeautifulSoup(html, 'lxml') searchbody = soup.find('div', attrs={'class': 'd-area'}) try: stitle = re.findall(r'<title>(.*?)</title>', html)[0] #print(stitle) except Exception: stitle = '検索結果' boxall = searchbody.find_all('li', attrs={'style': 'width: 130px;'}) onebox = str(boxall).split('</div></li>') boxlist = [] for box in onebox: boxdict = {} notitle = 0 if box: try: litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>', box)[0] # print(litetitle) if litetitle == None: notitle = 1 except: notitle = 1 try: cid = re.findall(r'https://www\.dmm\.co\.jp/.*?/cid=(\w+)/', box)[0] boxdict['cid'] = cid except Exception as e: boxdict['cid'] = '-' try: keywords = re.findall( r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>', box) keyword = ','.join(keywords) boxdict['keyword'] = keyword except: boxdict['keyword'] = '-' try: links = re.findall(r'(https://www\.dmm\.co\.jp/.*?/cid=\w+/)', box)[0] boxdict['links'] = links except: boxdict['links'] = '-' try: img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)', box) boxdict['img'] = img[0] except: boxdict['img'] = '-' try: title = re.findall(r'alt=\"(.*)\" src', box) boxdict['title'] = title[0] except: boxdict['title'] = '-' try: sublinks = re.findall(r'span><a href=\"(.*?)\">.*?</a></span>', box) sublink = 'https://www.dmm.co.jp' + sublinks[0] boxdict['sublinks'] = sublink except: boxdict['sublinks'] = '-' try: subtexts = re.findall( r'<span><a href=\".*?\">(.*?)</a></span>', box) boxdict['subtexts'] = subtexts[0] except: boxdict['subtexts'] = '-' if notitle == 0: #print(boxdict) boxlist.append(boxdict) return (boxlist, stitle)
def video(self, id): url = 'https://faleno.jp/top/works/%s' % id response = get_html_jp(url, verify=False) soup = BeautifulSoup(response, 'lxml') video = soup.find('a', attrs={'class': 'pop_sample'}).get('href') return (video)