Python get_html_jpの例

プログラミング言語: Python

名前空間/パッケージ名: app.utils.requests

メソッド/関数: get_html_jp

hotexamples.comのコード掲載数: 6

Python get_html_jp - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのapp.utils.requests.get_html_jpの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def dmmonecid(searchcid):
    searchcid = searchcid.replace('-', '00')
    searchurl = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid={}/'.format(
        searchcid)
    html = get_html_jp(searchurl)
    ciddataa, notitle = ciddata(html)
    if ciddataa == '指定されたページが見つかりません':
        return ciddataa, notitle
    temp_out = template_cid(ciddataa)
    return temp_out, notitle

コード例 #2

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def dmmcid(in_q, out_q):
    while in_q.empty() is not True:
        url = in_q.get()
        #url = 'https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=1060823/'
        html = get_html_jp(url)
        list = re.findall(
            r'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=([_0-9a-z]+)/',
            html)
        #print(list)
        out_q.append(list)
        in_q.task_done()

コード例 #3

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def findinfo(articleid):
    url = "https://www.dmm.co.jp/digital/videoa/-/list/=/article=actress/id=%s/" % articleid
    html = get_html_jp(url)
    page1 = re.findall(
        r'/digital/videoa/-/list/=/article=actress/id=\d+/page=(\d+)/', html)
    title = re.findall(r'<title>(.*) - エロ動画・アダルトビデオ - FANZA動画</title>', html)
    if page1 == []:
        page1 = 1
    else:
        page3 = []
        for i in page1:
            if i not in page3:
                page3.append(int(i))
        page4 = max(page3)
        page1 = page4
    title1 = title[0]
    return (page1, title1)

コード例 #4

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def prephotos(searchcid):
    searchurl = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid={}/'.format(
        searchcid)
    html = get_html_jp(searchurl)
    soup = BeautifulSoup(html, 'lxml')
    photourlss = soup.find_all('img', attrs={'class': 'mg-b6'})
    photourls = re.findall(
        r'(https://pics.dmm.co.jp/digital/video/.*?/.*?.jpg)', str(photourlss))
    photolist = list(photourls)
    #print(photolist)
    jpg = []
    for i in photolist:
        ii = list(i)
        ii.insert(-6, 'jp')
        iii = ''.join(ii)
        iii = iii.replace('-jp', 'jp-', 1)
        jpg.append(iii)

    return (jpg)

コード例 #5

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def dmmsearchall_data(searchstr):
    #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか'
    url = 'https://www.dmm.co.jp/search/=/searchstr={}/sort=rankprofile/'.format(
        searchstr)
    html = get_html_jp(url)
    #判断有无结果
    result = re.findall(r'(に一致する商品は見つかりませんでした。)', html)
    noresult = 'に一致する商品は見つかりませんでした。'
    try:
        if noresult in result:
            stitle = 1
            return (noresult, stitle)
    except Exception:
        pass

    soup = BeautifulSoup(html, 'lxml')
    searchbody = soup.find('div', attrs={'class': 'd-area'})
    try:
        stitle = re.findall(r'<title>(.*?)</title>', html)[0]
    except Exception:
        stitle = '検索結果'
    boxall = searchbody.find('div', attrs={'class': 'd-sect'})
    onebox = str(boxall).split('<div>')

    boxlist = []
    for box in onebox:
        boxdict = {}
        notitle = 0
        if box:
            try:
                litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>',
                                       box)[0]
                #print(litetitle)
                if litetitle == None:
                    notitle = 1
            except:
                notitle = 1
            try:
                cid = re.findall(
                    r'<a href=\"https://www\.dmm\.co\.jp/.*?/cid=(\w+)/\?.*?\">',
                    box)[0]
                boxdict['cid'] = cid
            except:
                boxdict['cid'] = '-'
            try:
                keywords = re.findall(
                    r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>',
                    box)
                keyword = ','.join(keywords)
                boxdict['keyword'] = keyword
            except:
                boxdict['keyword'] = '-'
            try:
                links = re.findall(
                    r'<a href=\"(https://www\.dmm\.co\.jp/.*?-/detail/=/cid=\w+/\?.*?)\">',
                    box)[0]
                boxdict['links'] = links
            except:
                boxdict['links'] = '-'
            try:
                img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)',
                                 box)[0]
                boxdict['img'] = img
            except Exception as e:

                boxdict['img'] = '-'
            try:
                title = re.findall(r'alt=\"(.*)\" src', box)[0]
                boxdict['title'] = title
            except Exception as e:

                boxdict['title'] = '-'
            try:
                sublinks = re.findall(
                    r'<span><a href=\"(.*?)\">.*?</a></span>', box)
                boxdict['sublinks'] = sublinks[0]
            except Exception as e:

                boxdict['sublinks'] = '-'
            try:
                subtexts = re.findall(
                    r'<span><a href=\".*?\">(.*?)</a></span>', box)[0]
                boxdict['subtexts'] = subtexts
            except:
                boxdict['subtexts'] = '-'

            if notitle == 0:
                #print(boxdict)
                boxlist.append(boxdict)
    return (boxlist, stitle)

コード例 #6

0

ファイルを表示

ファイル: dmm.py プロジェクト: copyit/multi-bot

def dmmlinks_data(links):
    #url = 'https://www.dmm.co.jp/digital/videoa/-/list/search/=/?searchstr=乙白さやか'
    url = links
    html = get_html_jp(url)
    #判断有无结果
    soup = BeautifulSoup(html, 'lxml')
    searchbody = soup.find('div', attrs={'class': 'd-area'})
    try:
        stitle = re.findall(r'<title>(.*?)</title>', html)[0]
        #print(stitle)
    except Exception:
        stitle = '検索結果'
    boxall = searchbody.find_all('li', attrs={'style': 'width: 130px;'})
    onebox = str(boxall).split('</div></li>')
    boxlist = []
    for box in onebox:
        boxdict = {}
        notitle = 0
        if box:
            try:
                litetitle = re.findall(r'<span class=\"txt\">(.*?)</span>',
                                       box)[0]
                # print(litetitle)
                if litetitle == None:
                    notitle = 1
            except:
                notitle = 1
            try:
                cid = re.findall(r'https://www\.dmm\.co\.jp/.*?/cid=(\w+)/',
                                 box)[0]

                boxdict['cid'] = cid
            except Exception as e:

                boxdict['cid'] = '-'
            try:
                keywords = re.findall(
                    r'<span class=\"ico-\w+-\w+\"><span>(.*?)</span></span>',
                    box)
                keyword = ','.join(keywords)
                boxdict['keyword'] = keyword
            except:
                boxdict['keyword'] = '-'
            try:
                links = re.findall(r'(https://www\.dmm\.co\.jp/.*?/cid=\w+/)',
                                   box)[0]
                boxdict['links'] = links
            except:
                boxdict['links'] = '-'
            try:
                img = re.findall(r'(pics\.dmm\.co\.jp/.*?/\w+/\w+.jpg)', box)
                boxdict['img'] = img[0]
            except:
                boxdict['img'] = '-'
            try:
                title = re.findall(r'alt=\"(.*)\" src', box)
                boxdict['title'] = title[0]
            except:
                boxdict['title'] = '-'
            try:
                sublinks = re.findall(r'span><a href=\"(.*?)\">.*?</a></span>',
                                      box)
                sublink = 'https://www.dmm.co.jp' + sublinks[0]
                boxdict['sublinks'] = sublink
            except:
                boxdict['sublinks'] = '-'
            try:
                subtexts = re.findall(
                    r'<span><a href=\".*?\">(.*?)</a></span>', box)
                boxdict['subtexts'] = subtexts[0]
            except:
                boxdict['subtexts'] = '-'

            if notitle == 0:
                #print(boxdict)
                boxlist.append(boxdict)
    return (boxlist, stitle)