Python get_html_jp 예제들, app.utils.func_requests.get_html_jp Python 예제들

예제 #1

0

파일 보기

def findinfo(articleid, mode='uid'):
    if mode == 'link':
        url = articleid
        html = get_html_jp(url)
        page1 = re.findall(r'/digital/videoa/-/list/=/.*/id=\d+/page=(\d+)/',
                           html)
        title = re.findall(r'<title>(.*) - エロ動画・アダルトビデオ - FANZA動画</title>',
                           html)
    elif mode == 'uid':
        url = "https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=%s/" % articleid
        html = get_html_jp(url)
        page1 = re.findall(
            r'/digital/videoa/-/list/=/article=actress/id=\d+/page=(\d+)/',
            html)
        title = re.findall(r'<title>(.*) - エロ動画・アダルトビデオ - FANZA動画</title>',
                           html)
    if page1 == []:
        page1 = 1
    else:
        page3 = []
        for i in page1:
            if i not in page3:
                page3.append(int(i))
        page4 = max(page3)
        page1 = page4
    return (page1, title[0])

예제 #2

0

파일 보기

def precid(searchcid):
    searchurl = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid={}/'.format(
        searchcid)
    html = get_html_jp(searchurl)
    soup = BeautifulSoup(html, 'lxml')
    title = soup.title.string
    body = soup.find('table', attrs={'class': 'mg-b20'})
    title = soup.title.string
    red = soup.find('span', attrs={'class': 'red'}).string
    title = red + title
    photo = soup.find('div', attrs={'class': 'tx10 pd-3 lh4'}).a.get('href')
    pushdate = body.find_all('tr')[0].find('td').find_next_sibling().string
    time = body.find_all('tr')[1].find('td').find_next_sibling().string
    performer = body.find_all('tr')[2].find(
        'td').find_next_sibling().span.a.string
    num = body.find_all('tr')[-1].find('td').find_next_sibling().string
    text = '''
    `{}`
    [DVD ]({})
    *発売日:*{}
    *収録時間:*{}
    *出演者:*{}
    *品番:* `{}`
    [官方信息]({})
    '''.format(title, photo, pushdate, time, performer, num, searchurl)
    return text

예제 #3

0

파일 보기

 def new_work(self):
     url = 'https://faleno.jp/top/work/'
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     title = soup.title.string
     body = soup.find('div', attrs={'class': 'box_kanren01'})
     results = body.find_all('div', attrs={'class': 'waku_kanren01'})
     boxlist = []
     for i in results:
         boxdict = {}
         liv = i.find('a')
         imgs = i.find('img')
         url = liv.get('href')
         boxdict['url'] = url
         work = re.findall(r'https://faleno.jp/top/works/(.*)/',
                           str(url))[0]
         boxdict['work'] = work
         name = imgs.get('alt')
         boxdict['name'] = name
         img = imgs.get('src')
         boxdict['img'] = img
         boxlist.append(boxdict)
     env = Environment(loader=PackageLoader(__name__,
                                            "templates"))  # 创建一个包加载器对象
     template = env.get_template('faleno.md')  # 获取一个模板文件
     temp_out = template.render(data=boxlist, title=str(title))
     #print(temp_out)  # 渲染
     return (temp_out)

예제 #4

0

파일 보기

 def actress(self):
     url = 'https://faleno.jp/top/actress/'
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     title = soup.find('section', attrs={'class': 'bread'}).p.contents[1]
     group = soup.find_all('li', attrs={'data-mh': 'group01'})
     groups = []
     for i in group:
         box = {}
         links = i.a.get('href')
         name = i.find('div', attrs={'class': 'text_name'})
         id = re.findall(
             r'<div class="text_name">(.*)<span>(.*)</span></div>',
             str(name))[0]
         actress = id[0]
         eng_name = id[1]
         box['links'] = links
         box['name'] = actress
         box['eng'] = eng_name
         groups.append(box)
     env = Environment(loader=PackageLoader(__name__,
                                            "templates"))  # 创建一个包加载器对象
     template = env.get_template('faleno_actress.md')  # 获取一个模板文件
     temp_out = template.render(data=groups, title=str(title))
     #print(temp_out)  # 渲染
     return (temp_out)

예제 #5

0

파일 보기

 def photo(self, id):
     url = 'https://faleno.jp/top/works/%s' % id
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     photos = soup.find_all('a', attrs={'class': 'pop_img'})
     photo = []
     for i in photos:
         photo.append(i.get('href'))
     return (photo)

예제 #6

0

파일 보기

def dmmonecid(searchcid):
    searchcid = searchcid.replace('-', '00')
    searchurl = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid={}/'.format(
        searchcid)
    html = get_html_jp(searchurl)
    ciddataa, notitle = ciddata(html)
    if ciddataa == '指定されたページが見つかりません':
        return ciddataa, notitle
    temp_out = template_cid(ciddataa)
    return temp_out, notitle

예제 #7

0

파일 보기

def dmmcid(in_q, out_q):
    while in_q.empty() is not True:
        url = in_q.get()
        #url = 'https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=1060823/'
        html = get_html_jp(url)
        list = re.findall(
            r'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=([_0-9a-z]+)/',
            html)
        #print(url,list)
        out_q.append(list)
        in_q.task_done()

예제 #8

0

파일 보기

def prephotos(searchurl):
    #print(searchurl)
    html = get_html_jp(searchurl)
    soup = BeautifulSoup(html, 'lxml')
    photourlss = soup.find_all('img', attrs={'class': 'mg-b6'})
    photourls = re.findall(
        r'(https://pics.dmm.co.jp/digital/video/.*?/.*?.jpg)', str(photourlss))
    photolist = list(photourls)
    #print(photolist)
    jpg = []
    for i in photolist:
        ii = list(i)
        ii.insert(-6, 'jp')
        iii = ''.join(ii)
        iii = iii.replace('-jp', 'jp-', 1)
        jpg.append(iii)
    return (jpg)

예제 #9

0

파일 보기

 def work(self, id):
     url = 'https://faleno.jp/top/works/%s' % id
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     box = {}
     img = soup.find('a', attrs={'class': 'pop_sample'}).img.get('src')
     box['img'] = img
     title = soup.find('a', attrs={'class': 'pop_sample'}).img.get('alt')
     text = soup.find('div', attrs={'class': 'box_works01_text'}).p.string
     box['text'] = text
     file = soup.find_all('li', attrs={'class': 'clearfix'})
     box['url'] = url
     for i in file:
         box[i.find(text=True).strip()] = i.p.string
     env = Environment(loader=PackageLoader(__name__,
                                            "templates"))  # 创建一个包加载器对象
     template = env.get_template('faleno_work.md')  # 获取一个模板文件
     temp_out = template.render(data=box, title=str(title))
     #print(temp_out)  # 渲染
     return (temp_out)

예제 #10

0

파일 보기

 def actress_work(self, name):
     name = name.replace(' ', '_').lower()
     url = 'https://faleno.jp/top/actress/%s/' % name
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     profile = {}
     title = soup.title.string
     bar = soup.find('div', attrs={'class': 'bar02'}).h1
     barr = re.findall(r'<h1>(.*)<span>(.*)</span></h1>', str(bar))[0]
     img = soup.find('div', attrs={
         'class': 'box_actress02_left'
     }).img.get('src')
     files = soup.find('div', attrs={
         'class': 'box_actress02_right'
     }).find_all('li', attrs={'class': 'clearfix'})
     box = {}
     for i in files:
         box[i.find(text=True).strip()] = i.p.string
     body = soup.find('div', attrs={'class': 'box_kanren01'})
     results = body.find_all('div', attrs={'class': 'waku_kanren01'})
     boxlist = []
     for i in results:
         boxdict = {}
         liv = i.find('a')
         imgs = i.find('img')
         url = liv.get('href')
         work = re.findall(r'https://faleno.jp/top/works/(.*)/',
                           str(url))[0]
         boxdict['work'] = work
         boxdict['url'] = url
         name = imgs.get('alt')
         boxdict['name'] = name
         img = imgs.get('src')
         boxdict['img'] = img
         boxlist.append(boxdict)
     env = Environment(loader=PackageLoader(__name__,
                                            "templates"))  # 创建一个包加载器对象
     template = env.get_template('faleno.md')  # 获取一个模板文件
     temp_out = template.render(data=boxlist, title=str(title))
     #print(temp_out)  # 渲染
     return (temp_out)

예제 #11

0

파일 보기

 def search(self, word, searchmode='temp'):
     url = 'https://faleno.jp/top/?s=%s' % word
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     title = soup.find('section', attrs={'class': 'bread'}).p.contents[1]
     try:
         ifresult = re.findall(r'に一致する作品は見つかりませんでした', str(soup))
         noresult = "{}(没有找到相应内容的作品):{}".format(ifresult, title)
         if 'に一致する作品は見つかりませんでした' in ifresult:
             return noresult
     except Exception as e:
         print(e)
     results = soup.find_all('div', attrs={'class': 'waku_kanren01'})
     #print(title)
     boxlist = []
     for i in results:
         boxdict = {}
         liv = i.find('a')
         imgs = i.find('img')
         url = liv.get('href')
         work = re.findall(r'https://faleno.jp/top/works/(.*)/',
                           str(url))[0]
         boxdict['work'] = work
         boxdict['url'] = url
         name = imgs.get('alt')
         boxdict['name'] = name
         img = imgs.get('src')
         boxdict['img'] = img
         boxlist.append(boxdict)
     if searchmode == 'search':
         return boxlist
     if searchmode == 'temp':
         env = Environment(loader=PackageLoader(__name__,
                                                "templates"))  # 创建一个包加载器对象
         template = env.get_template('faleno.md')  # 获取一个模板文件
         temp_out = template.render(data=boxlist, title=str(title))
         #print(temp_out)  # 渲染
         return (temp_out)

예제 #12

0

파일 보기

def dmmsearchall_data(searchstr):
    #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか'
    url = 'https://www.dmm.co.jp/search/=/searchstr={}/sort=rankprofile/'.format(
        searchstr)
    html = get_html_jp(url)
    #判断有无结果
    result = re.findall(r'(に一致する商品は見つかりませんでした。)', html)
    noresult = 'に一致する商品は見つかりませんでした。'
    try:
        if noresult in result:
            stitle = 1
            return (noresult, stitle)
    except Exception:
        pass

    soup = BeautifulSoup(html, 'lxml')
    searchbody = soup.find('div', attrs={'class': 'd-area'})
    try:
        stitle = re.findall(r'<title>(.*?)</title>', html)[0]
    except Exception:
        stitle = '検索結果'
    boxall = searchbody.find('div', attrs={'class': 'd-sect'})
    onebox = str(boxall).split('<div>')

    boxlist = []
    for box in onebox:
        boxdict = {}
        notitle = 0
        if box:
            try:
                litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>',
                                       box)[0]
                #print(litetitle)
                if litetitle == None:
                    notitle = 1
            except:
                notitle = 1
            try:
                cid = re.findall(
                    r'<a href=\"https://www\.dmm\.co\.jp/.*?/cid=(\w+)/\?.*?\">',
                    box)[0]
                boxdict['cid'] = cid
            except:
                boxdict['cid'] = '-'
            try:
                keywords = re.findall(
                    r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>',
                    box)
                keyword = ','.join(keywords)
                boxdict['keyword'] = keyword
            except:
                boxdict['keyword'] = '-'
            try:
                links = re.findall(
                    r'<a href=\"(https://www\.dmm\.co\.jp/.*?-/detail/=/cid=\w+/\?.*?)\">',
                    box)[0]
                boxdict['links'] = links
            except:
                boxdict['links'] = '-'
            try:
                img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)',
                                 box)[0]
                boxdict['img'] = img
            except Exception as e:

                boxdict['img'] = '-'
            try:
                title = re.findall(r'alt=\"(.*)\" src', box)[0]
                boxdict['title'] = title
            except Exception as e:

                boxdict['title'] = '-'
            try:
                sublinks = re.findall(
                    r'<span><a href=\"(.*?)\">.*?</a></span>', box)
                boxdict['sublinks'] = sublinks[0]
            except Exception as e:

                boxdict['sublinks'] = '-'
            try:
                subtexts = re.findall(
                    r'<span><a href=\".*?\">(.*?)</a></span>', box)[0]
                boxdict['subtexts'] = subtexts
            except:
                boxdict['subtexts'] = '-'

            if notitle == 0:
                #print(boxdict)
                boxlist.append(boxdict)
    return (boxlist, stitle)

예제 #13

0

파일 보기

def dmmlinks_data(links):
    #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか'
    url = links
    html = get_html_jp(url)
    #判断有无结果
    soup = BeautifulSoup(html, 'lxml')
    searchbody = soup.find('div', attrs={'class': 'd-area'})
    try:
        stitle = re.findall(r'<title>(.*?)</title>', html)[0]
        #print(stitle)
    except Exception:
        stitle = '検索結果'
    boxall = searchbody.find_all('li', attrs={'style': 'width: 130px;'})
    onebox = str(boxall).split('</div></li>')
    boxlist = []
    for box in onebox:
        boxdict = {}
        notitle = 0
        if box:
            try:
                litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>',
                                       box)[0]
                # print(litetitle)
                if litetitle == None:
                    notitle = 1
            except:
                notitle = 1
            try:
                cid = re.findall(r'https://www\.dmm\.co\.jp/.*?/cid=(\w+)/',
                                 box)[0]

                boxdict['cid'] = cid
            except Exception as e:

                boxdict['cid'] = '-'
            try:
                keywords = re.findall(
                    r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>',
                    box)
                keyword = ','.join(keywords)
                boxdict['keyword'] = keyword
            except:
                boxdict['keyword'] = '-'
            try:
                links = re.findall(r'(https://www\.dmm\.co\.jp/.*?/cid=\w+/)',
                                   box)[0]
                boxdict['links'] = links
            except:
                boxdict['links'] = '-'
            try:
                img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)', box)
                boxdict['img'] = img[0]
            except:
                boxdict['img'] = '-'
            try:
                title = re.findall(r'alt=\"(.*)\" src', box)
                boxdict['title'] = title[0]
            except:
                boxdict['title'] = '-'
            try:
                sublinks = re.findall(r'span><a href=\"(.*?)\">.*?</a></span>',
                                      box)
                sublink = 'https://www.dmm.co.jp' + sublinks[0]
                boxdict['sublinks'] = sublink
            except:
                boxdict['sublinks'] = '-'
            try:
                subtexts = re.findall(
                    r'<span><a href=\".*?\">(.*?)</a></span>', box)
                boxdict['subtexts'] = subtexts[0]
            except:
                boxdict['subtexts'] = '-'

            if notitle == 0:
                #print(boxdict)
                boxlist.append(boxdict)
    return (boxlist, stitle)

예제 #14

0

파일 보기

 def video(self, id):
     url = 'https://faleno.jp/top/works/%s' % id
     response = get_html_jp(url, verify=False)
     soup = BeautifulSoup(response, 'lxml')
     video = soup.find('a', attrs={'class': 'pop_sample'}).get('href')
     return (video)