Python get_html Exemples, Function.getHtml.get_html Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : javbus.py Projet : yellowSpringSky/AVDC

def getCover_small(number):  # 从avsox获取封面图
    htmlcode = get_html('https://avsox.host/cn/search/' + number)
    html = etree.fromstring(htmlcode,
                            etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
    if result == '' or result == 'null' or result == 'None':
        htmlcode = get_html('https://avsox.host/cn/search/' +
                            number.replace('-', '_'))
        html = etree.fromstring(
            htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        result = str(
            html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
        if result == '' or result == 'null' or result == 'None':
            htmlcode = get_html('https://avsox.host/cn/search/' +
                                number.replace('_', ''))
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
    if counts == 0:
        return ''
    for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
        number_get = html.xpath(
            "//div[@id='waterfall']/div[" + str(count) +
            "]/a/div[@class='photo-info']/span/date[1]/text()")
        if len(number_get) > 0 and number_get[0] == number:
            cover_small = html.xpath(
                "//div[@id='waterfall']/div[" + str(count) +
                "]/a/div[@class='photo-frame']/img/@src")[0]
            return cover_small
    return ''

Exemple #2

0

Afficher le fichier

Fichier : javbus.py Projet : pastay/AVDC

def find_number(number):
    # =======================================================================有码搜索
    if not (re.match('^\d{4,}', number) or re.match('n\d{4}', number) or 'HEYZO' in number.upper()):
        htmlcode = get_html('https://www.javbus.com/search/' + number + '&type=1')
        html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        counts = len(html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div"))
        if counts != 0:
            for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
                number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
                number_get = number_get.upper()
                number = number.upper()
                if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', ''):
                    result_url = html.xpath(
                        "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
                    return result_url
    # =======================================================================无码搜索
    htmlcode = get_html('https://www.javbus.com/uncensored/search/' + number + '&type=1')
    html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    counts = len(html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div"))
    if counts == 0:
        return 'not found'
    for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
        number_get = html.xpath("//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
        number_get = number_get.upper()
        number = number.upper()
        if number_get == number or number_get == number.replace('-', '') or number_get == number.replace('_', ''):
            result_url = html.xpath(
                "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
            return result_url
        elif number_get == number.replace('-', '_') or number_get == number.replace('_', '-'):
            result_url = html.xpath(
                "//div[@id='waterfall']/div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
            return result_url
    return 'not found'

Exemple #3

0

Afficher le fichier

def getOutlineScore(number):  # 获取简介
    outline = ''
    score = ''
    try:
        response = post_html("https://www.jav321.com/search", query={"sn": number})
        detail_page = etree.fromstring(response, etree.HTMLParser())
        outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']")
        if re.search(r'<b>评分</b>: <img data-original="/img/(\d+).gif" />', response):
            score = re.findall(r'<b>评分</b>: <img data-original="/img/(\d+).gif" />', response)[0]
            score = str(float(score) / 10.0)
        else:
            score = str(re.findall(r'<b>评分</b>: ([^<]+)<br>', response)).strip(" [',']").replace('\'', '')
        if outline == '':
            dmm_htmlcode = get_html(
                "https://www.dmm.co.jp/search/=/searchstr=" + number.replace('-', '') + "/sort=ranking/")
            if 'に一致する商品は見つかりませんでした' not in dmm_htmlcode:
                dmm_page = etree.fromstring(dmm_htmlcode, etree.HTMLParser())
                url_detail = str(dmm_page.xpath('//*[@id="list"]/li[1]/div/p[2]/a/@href')).split(',', 1)[0].strip(
                    " ['']")
                if url_detail != '':
                    dmm_detail = get_html(url_detail)
                    html = etree.fromstring(dmm_detail, etree.HTMLParser())
                    outline = str(html.xpath('//*[@class="mg-t0 mg-b20"]/text()')).strip(" ['']").replace('\\n', '').replace('\n', '')
    except Exception as error_info:
        print('Error in javbus.getOutlineScore : ' + str(error_info))
    return outline, score

Exemple #4

0

Afficher le fichier

def main(number):
    htmlcode = get_html(
        'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=' + number)
    url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=' + number
    if '404 Not Found' in htmlcode:
        htmlcode = get_html('https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=' +
                            number)
        url = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=' + number
    if '404 Not Found' in htmlcode:
        dic = {
            'title': '',
            'website': '',
        }
        js = json.dumps(dic,
                        ensure_ascii=False,
                        sort_keys=True,
                        indent=4,
                        separators=(',', ':'))  # .encode('UTF-8')
        return js
    try:
        actor = getActor(htmlcode)
        dic = {
            'title': getTitle(htmlcode).strip(getActor(htmlcode)),
            'studio': getStudio(htmlcode),
            'publisher': getPublisher(htmlcode),
            'outline': getOutline(htmlcode),
            'runtime': getRuntime(htmlcode),
            'director': getDirector(htmlcode),
            'actor': actor,
            'release': getRelease(htmlcode),
            'number': getNum(htmlcode),
            'cover': getCover(htmlcode, number),
            'imagecut': 1,
            'tag': getTag(htmlcode),
            'series': getSeries(htmlcode),
            'year': getYear(getRelease(
                htmlcode)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo': getActorPhoto(actor),
            'website': url,
            'source': 'dmm.py',
        }
    except:
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(dic,
                    ensure_ascii=False,
                    sort_keys=True,
                    indent=4,
                    separators=(',', ':'))  # .encode('UTF-8')
    return js

Exemple #5

0

Afficher le fichier

def getOutline(number):  # 获取简介
    try:
        dww_htmlcode = get_html('https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=' + number.replace("-", '00'))
        if '404 Not Found' in dww_htmlcode:
            dww_htmlcode = get_html('https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=' + number.replace("-", '00'))
    except:
        dww_htmlcode = ''
    html = etree.fromstring(dww_htmlcode, etree.HTMLParser())
    result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")).strip(" ['']")
    return result.replace('\n', '').replace('\\n', '').replace('\'', '').replace(',', '').replace(' ', '')

Exemple #6

0

Afficher le fichier

Fichier : javbus.py Projet : pastay/AVDC

def getOutlineScore(number):  # 获取简介
    dmm_htmlcode = get_html("https://www.dmm.co.jp/search/=/searchstr=" + number + "/sort=ranking/")
    dmm_page = etree.fromstring(dmm_htmlcode, etree.HTMLParser())
    dmm_detail = get_html(str(dmm_page.xpath('//*[@id="list"]/li[1]/div/p[2]/a/@href')).split(',',1)[0].strip(" ['']"))
    html = etree.fromstring(dmm_detail, etree.HTMLParser())
    outline = str(html.xpath('//*[@class="mg-t0 mg-b20"]/text()')).strip(" ['']")
    if outline.strip() == "":
        response = post_html("https://www.jav321.com/search", query={"sn": number})
        detail_page = etree.fromstring(response, etree.HTMLParser())
        outline = str(detail_page.xpath('/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()')).strip(" ['']")
    score = str(html.xpath('//*[@class="d-review__average"]/strong/text()')).strip(" ['']点")
    return outline, score

Exemple #7

0

Afficher le fichier

Fichier : dmm.py Projet : zjz894251se/AVDC

def main(number):
    try:
        htmlcode = get_html(
            'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=' + number)
        url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=' + number
        if '404 Not Found' in htmlcode:
            htmlcode = get_html(
                'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=' + number)
            url = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=' + number
        if '404 Not Found' in htmlcode:
            raise Exception('Movie Data not found in dmm!')
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        actor = getActor(htmlcode)
        dic = {
            'title': getTitle(htmlcode).strip(getActor(htmlcode)),
            'studio': getStudio(htmlcode),
            'publisher': getPublisher(htmlcode),
            'outline': getOutline(htmlcode),
            'score': getScore(htmlcode),
            'runtime': getRuntime(htmlcode),
            'director': getDirector(htmlcode),
            'actor': actor,
            'release': getRelease(htmlcode),
            'number': getNum(htmlcode),
            'tag': getTag(htmlcode),
            'series': getSeries(htmlcode).replace('-', ''),
            'year': getYear(getRelease(htmlcode)),
            'actor_photo': getActorPhoto(actor),
            'cover': getCover(htmlcode, number),
            'extrafanart': getExtraFanart(htmlcode),
            'imagecut': 1,
            'website': url,
            'source': 'dmm.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in dmm.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(dic,
                    ensure_ascii=False,
                    sort_keys=True,
                    indent=4,
                    separators=(',', ':'))  # .encode('UTF-8')
    return js

Exemple #8

0

Afficher le fichier

def main(number):
    a = get_html('https://avsox.host/cn/search/' + number)
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
    if result1 == '' or result1 == 'null' or result1 == 'None':
        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
        if result1 == '' or result1 == 'null' or result1 == 'None':
            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
    web = get_html(result1)
    soup = BeautifulSoup(web, 'lxml')
    info = str(soup.find(attrs={'class': 'row movie'}))
    try:
        dic = {
            'actor': getActor(web),
            'title': getTitle(web).strip(getNum(web)).strip().replace(' ', '-'),
            'studio': getStudio(info),
            'publisher': '',
            'outline': '',  #
            'runtime': getRuntime(info),
            'director': '',  #
            'release': getRelease(info),
            'number': getNum(info),
            'cover': getCover(web),
            'cover_small': getCover_small(a, number),
            'imagecut': 3,
            'tag': getTag(web),
            'series': getSeries(info),
            'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo': getActorPhoto(web),
            'website': result1,
            'source': 'avsox.py',
        }
    except:
        if a == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js

Exemple #9

0

Afficher le fichier

def main(number, appoint_url):
    try:
        count, response, url = getUrl(number)
        if str(response) == 'ProxyError':
            raise TimeoutError
        if appoint_url != '':
            url = appoint_url
        elif url == '':
            raise Exception('Movie Data not found in avsox!')
        web = get_html(url)
        soup = BeautifulSoup(web, 'lxml')
        info = str(soup.find(attrs={'class': 'row movie'}))
        number = getNum(web)
        print(1)
        dic = {
            'actor': getActor(web),
            'title': getTitle(web).strip(number).strip().replace(' ', '-'),
            'studio': getStudio(info),
            'runtime': getRuntime(info),
            'release': getRelease(info),
            'number': getNum(info),
            'tag': getTag(web),
            'series': getSeries(info),
            'year': getYear(getRelease(info)),
            'actor_photo': getActorPhoto(web),
            'cover': getCover(web),
            'cover_small': getCover_small(response, count),
            'extrafanart': '',
            'imagecut': 3,
            'director': '',
            'publisher': '',
            'outline': '',
            'score': '',
            'website': url,
            'source': 'avsox.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in avsox.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js


# print(main('051119-917'))
# print(main('032620_001'))
# print(main('032620_001', 'https://avsox.host/cn/movie/cb8d28437cff4e90'))

Exemple #10

0

Afficher le fichier

def main(number):
    htmlcode2 = get_html(
        'http://adult.contents.fc2.com/article_search.php?id=' + number +
        '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    htmlcode = get_html('https://fc2club.com//html/FC2-' + number + '.html')
    actor = getActor(htmlcode)
    if len(actor) == 0:
        actor = 'FC2系列'
    try:
        dic = {
            'title': getTitle(htmlcode).replace(' ', '-'),
            'studio': getStudio(htmlcode),
            'publisher': '',
            'year': '',  # str(re.search('\d{4}',getRelease(number)).group()),
            'outline': getOutline(htmlcode2).replace('\n', ''),
            'runtime': getYear(getRelease(htmlcode)),
            'director': '',
            'actor': actor.replace('/', ','),
            'release': getRelease(number),
            'number': 'FC2-' + number,
            'cover': getCover(htmlcode, number, htmlcode2),
            'imagecut': 0,
            'series': '',
            'tag': getTag(htmlcode),
            'actor_photo': getActorPhoto(actor),
            'website': 'https://fc2club.com//html/FC2-' + number + '.html',
            'source': 'fc2fans_club.py',
        }
    except:
        if htmlcode2 == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #11

0

Afficher le fichier

Fichier : javbus.py Projet : pastay/AVDC

def getActorPhoto(htmlcode):
    soup = BeautifulSoup(htmlcode, 'lxml')
    a = soup.find_all(attrs={'class': 'star-name'})
    d = {}
    for i in a:
        l = i.a['href']
        t = i.get_text()
        html = etree.fromstring(get_html(l), etree.HTMLParser())
        p = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
        p2 = {t: p}
        d.update(p2)
    return d

Exemple #12

0

Afficher le fichier

def find_number(number):
    htmlcode = get_html('https://xcity.jp/result_published/?q=' +
                        number.replace('-', ''))
    if '該当する作品はみつかりませんでした' in htmlcode:
        return 'not found', ''
    html = etree.fromstring(htmlcode,
                            etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    counts = len(
        html.xpath("//div[@id='searchResult']/table[@class='resultList']/tr"))
    if counts >= 2:
        for count in range(2, counts + 1):  # 遍历搜索结果，找到需要的番号
            result_url = 'https://xcity.jp' + html.xpath(
                "//div[@id='searchResult']/table[@class='resultList']/tr[" +
                str(count) + "]/td[1]/a/@href")[0]
            detail_page = get_html(result_url)
            detail_page_html = etree.fromstring(detail_page,
                                                etree.HTMLParser())
            number_get = str(
                detail_page_html.xpath("//span[@id='hinban']/text()")[0])
            if number_get.upper() == number.replace('-', '').upper():
                return result_url, detail_page
    return 'not found', ''

Exemple #13

0

Afficher le fichier

def main(number, appoint_url):
    try:
        url = 'https://fc2club.com//html/FC2-' + number + '.html'
        if appoint_url:
            url = appoint_url
        htmlcode = get_html(url)
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        actor = getActor(htmlcode)
        if len(actor) == 0:
            actor = 'FC2系列'
        dic = {
            'title': getTitle(htmlcode).strip(' '),
            'studio': getStudio(htmlcode),
            'score': getScore(htmlcode),
            'runtime': getYear(getRelease(htmlcode)),
            'actor': actor.replace('/', ','),
            'release': getRelease(number),
            'number': 'FC2-' + number,
            'tag': getTag(htmlcode),
            'actor_photo': getActorPhoto(actor),
            'cover': getCover(htmlcode),
            'extrafanart': getExtraFanart(htmlcode),
            'imagecut': 0,
            'director': '',
            'series': '',
            'publisher': '',
            'year': '',
            'outline': '',
            'website': 'https://fc2club.com//html/FC2-' + number + '.html',
            'source': 'fc2fans_club.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in fc2fans_club.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )
    return js

Exemple #14

0

Afficher le fichier

def main_uncensored(number):
    try:
        result_url = find_number(number)
        if result_url == 'not found':
            raise Exception('Movie Data not found in javbus.main_uncensored!')
        htmlcode = get_html(result_url)
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        number = getNum(htmlcode)
        outline = ''
        score = ''
        if 'HEYZO' in number.upper():
            outline, score = getOutlineScore(number)
        dic = {
            'title': getTitle(htmlcode).replace(number, '').strip().replace(' ', '-'),
            'studio': getStudio(htmlcode),
            'publisher': '',
            'year': getYear(getRelease(htmlcode)),
            'outline': outline,
            'score': score,
            'runtime': getRuntime(htmlcode).replace('分鐘', '').strip(),
            'director': getDirector(htmlcode),
            'actor': getActor(htmlcode),
            'release': getRelease(htmlcode),
            'number': getNum(htmlcode),
            'cover': getCover(htmlcode),
            'extrafanart': getExtraFanart(htmlcode),
            'tag': getTag(htmlcode),
            'series': getSeries(htmlcode),
            'imagecut': 3,
            'cover_small': getCover_small(number),
            'actor_photo': getActorPhoto(htmlcode),
            'website': result_url,
            'source': 'javbus.py',
        }
        if dic['cover_small'] == '':
            dic['imagecut'] = 0
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in javbus.main_uncensored : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js

Exemple #15

0

Afficher le fichier

Fichier : javbus.py Projet : pastay/AVDC

def getCover_small(number):  # 从avsox获取封面图
    try:
        htmlcode = get_html('https://avsox.host/cn/search/' + number)
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        counts = len(html.xpath("//div[@id='waterfall']/div/a/div"))
        if counts == 0:
            return ''
        for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
            number_get = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-info']/span/date[1]/text()")
            if len(number_get) > 0 and number_get[0].upper() == number.upper():
                cover_small = html.xpath("//div[@id='waterfall']/div[" + str(count) + "]/a/div[@class='photo-frame']/img/@src")[0]
                return cover_small
    except Exception as error_info:
        print('Error in javbus.getCover_small : ' + str(error_info))
    return ''

Exemple #16

0

Afficher le fichier

Fichier : avsox.py Projet : DioSylar/e71d6c85-a6c9-483a-8312-e2dab0ca2424

def getUrl(number):
    response = get_html('https://avsox.website/cn/search/' + number)
    html = etree.fromstring(response,
                            etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    url_list = html.xpath('//*[@id="waterfall"]/div/a/@href')
    if len(url_list) > 0:
        for i in range(1, len(url_list) + 1):
            number_get = str(
                html.xpath('//*[@id="waterfall"]/div[' + str(i) +
                           ']/a/div[@class="photo-info"]/span/date[1]/text()')
            ).strip(" ['']")
            if number.upper() == number_get.upper():
                page_url = 'https:' + url_list[i - 1]
                return i, response, page_url
    return 0, response, ''

Exemple #17

0

Afficher le fichier

Fichier : mgstage.py Projet : wangrui1573/AVDC

def main(number):
    try:
        number = number.upper()
        htmlcode = str(
            get_html('https://www.mgstage.com/product/product_detail/' + str(number) + '/', cookies={'adc': '1'}))
        htmlcode = htmlcode.replace('ahref', 'a href')  # 针对a标签、属性中间未分开
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        actor = getActor(htmlcode).replace(' ', '')
        dic = {
            'title': getTitle(htmlcode).replace("\\n", '').replace('        ', '').strip(','),
            'studio': getStudio(htmlcode).strip(','),
            'publisher': getPublisher(htmlcode).strip(','),
            'outline': getOutline(htmlcode).replace('\n', '').strip(','),
            'score': getScore(htmlcode).strip(','),
            'runtime': getRuntime(htmlcode).strip(','),
            'actor': actor.strip(','),
            'release': getRelease(htmlcode).strip(','),
            'number': getNum(htmlcode).strip(','),
            'cover': getCover(htmlcode).strip(','),
            'extrafanart': getExtraFanart(htmlcode).strip(','),
            'imagecut': 0,
            'tag': getTag(htmlcode).strip(','),
            'series': getSeries(htmlcode).strip(','),
            'year': getYear(getRelease(htmlcode)).strip(','),
            'actor_photo': getActorPhoto(actor.split(',')),
            'director': '',
            'website': 'https://www.mgstage.com/product/product_detail/' + str(number) + '/',
            'source': 'mgstage.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in mgstage.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js

Exemple #18

0

Afficher le fichier

def main(number):
    number = number.upper()
    htmlcode = str(
        get_html('https://www.mgstage.com/product/product_detail/' +
                 str(number) + '/',
                 cookies={'adc': '1'}))
    soup = BeautifulSoup(htmlcode, 'lxml')
    a = str(soup.find(attrs={'class': 'detail_data'})).replace(
        '\n                                        ',
        '').replace('                                ',
                    '').replace('\n                            ',
                                '').replace('\n                        ', '')
    try:
        actor = getActor(a).replace(' ', '')
        dic = {
            'title':
            getTitle(htmlcode).replace("\\n", '').replace('        ', ''),
            'studio':
            getStudio(a),
            'publisher':
            getPublisher(a),
            'outline':
            getOutline(htmlcode).replace('\n', ''),
            'runtime':
            getRuntime(a),
            'director':
            '',
            'actor':
            actor,
            'release':
            getRelease(a),
            'number':
            getNum(a),
            'cover':
            getCover(htmlcode),
            'imagecut':
            0,
            'tag':
            getTag(a).strip(','),
            'series':
            getSeries(a).strip(','),
            'year':
            getYear(getRelease(
                a)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo':
            getActorPhoto(actor.split(',')),
            'website':
            'https://www.mgstage.com/product/product_detail/' + str(number) +
            '/',
            'source':
            'mgstage.py',
        }
    except:
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #19

0

Afficher le fichier

Fichier : javbus.py Projet : pastay/AVDC

def main_us(number):
    try:
        htmlcode = get_html('https://www.javbus.zone/search/' + number)
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        counts = len(html.xpath("//div[@class='row']/div[@id='waterfall']/div"))
        if counts == 0:
            raise Exception('Movie Data not found in javbus.main_us!')
        result_url = ''
        cover_small = ''
        for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
            number_get = html.xpath("//div[@id='waterfall']/div[" + str(
                count) + "]/a[@class='movie-box']/div[@class='photo-info']/span/date[1]/text()")[0]
            if number_get.upper() == number.upper() or number_get.replace('-', '').upper() == number.upper():
                result_url = html.xpath(
                    "//div[@id='waterfall']/div[" + str(count) + "]/a[@class='movie-box']/@href")[0]
                cover_small = html.xpath(
                    "//div[@id='waterfall']/div[" + str(
                        count) + "]/a[@class='movie-box']/div[@class='photo-frame']/img[@class='img']/@src")[0]
                break
        if result_url == '':
            raise Exception('Movie Data not found in javbus.main_us!')
        htmlcode = get_html(result_url)
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        number = getNum(htmlcode)
        dic = {
            'title': getTitle(htmlcode).replace(number, '').strip(),
            'studio': getStudio(htmlcode),
            'year': getYear(getRelease(htmlcode)),
            'runtime': getRuntime(htmlcode).replace('分鐘', '').strip(),
            'director': getDirector(htmlcode),
            'actor': getActor(htmlcode),
            'release': getRelease(htmlcode),
            'number': getNum(htmlcode),
            'tag': getTag(htmlcode),
            'series': getSeries(htmlcode),
            'cover': getCover(htmlcode),
            'cover_small': cover_small,
            'imagecut': 3,
            'actor_photo': getActorPhoto(htmlcode),
            'publisher': '',
            'outline': '',
            'score': '',
            'website': result_url,
            'source': 'javbus.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in javbus.main_us : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js

Exemple #20

0

Afficher le fichier

Fichier : dmm.py Projet : DioSylar/e71d6c85-a6c9-483a-8312-e2dab0ca2424

def main(number, appoint_url=''):
    # fanza allow letter + number + underscore, normalize the input here
    # @note: I only find the usage of underscore as h_test123456789
    fanza_search_number = number
    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
    if fanza_search_number.startswith("h-"):
        fanza_search_number = fanza_search_number.replace("h-", "h_")

    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()

    fanza_urls = [
        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/videoc/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/nikkatsu/-/detail/=/cid=",
        "https://www.dmm.co.jp/rental/-/detail/=/cid=",
    ]
    chosen_url = ""
    htmlcode = ''
    if appoint_url:
        chosen_url = appoint_url
        htmlcode = get_html(
            "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
                urlencode({"rurl": appoint_url})
            ))
    else:
        for url in fanza_urls:
            chosen_url = url + fanza_search_number
            final_url = "https://www.dmm.co.jp/age_check/=/declared=yes/?{}".format(
                    urlencode({"rurl": chosen_url})
                )
            htmlcode = get_html(final_url)
            if "404 Not Found" not in htmlcode:
                break
    if "404 Not Found" in htmlcode:
        return json.dumps({"title": "", 'website': ''})
    try:
        # for some old page, the input number does not match the page
        # for example, the url will be cid=test012
        # but the hinban on the page is test00012
        # so get the hinban first, and then pass it to following functions
        fanza_hinban = getNum(htmlcode)
        release = getRelease(htmlcode)
        dic = {
            "title": getTitle(htmlcode).strip(),
            'publisher': getPublisher(htmlcode),
            'score': getScore(htmlcode),
            "studio": getStudio(htmlcode),
            "outline": getOutline(htmlcode),
            "runtime": getRuntime(htmlcode),
            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
            "release": release,
            "number": fanza_hinban,
            "cover": getCover(htmlcode, fanza_hinban),
            "imagecut": 1,
            "tag": getTag(htmlcode),
            "extrafanart": getExtrafanart(htmlcode),
            "label": getLabel(htmlcode),
            "year": getYear(release),  # str(re.search('\d{4}',getRelease(a)).group()),
            "actor_photo": "",
            "website": chosen_url,
            "source": "fanza.py",
            "series": getSeries(htmlcode),
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in dmm.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))  # .encode('UTF-8')
    return js

Exemple #21

0

Afficher le fichier

Fichier : javdb.py Projet : jason750718/AVDC_CUSTOM

def main_us(number):
    try:
        # ========================================================================搜索番号
        htmlcode = get_html('https://javdb.com/search?q=' + number +
                            '&f=all').replace(u'\xa0', u' ')
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        html = etree.fromstring(
            htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        counts = len(
            html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']'
            ))
        if counts == 0:
            raise Exception('Movie Data not found in javdb.main_us!')
        # ========================================================================遍历搜索结果，找到需要的番号所在URL
        number_series = number.split('.')[0]
        number_date = '20' + number.replace(number_series, '').strip('.')
        number_date = number_date.replace('.', '-')
        count = 1
        movie_found = 0
        for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
            series_get = html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']['
                + str(count) +
                ']/a[@class=\'box\']/div[@class=\'uid2\']/text()')[0]
            date_get = html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']['
                + str(count) +
                ']/a[@class=\'box\']/div[@class=\'meta\']/text()')[0]
            if re.search('\d{4}-\d{1,2}-\d{1,2}', date_get):
                date_get = re.findall('\d{4}-\d{1,2}-\d{1,2}', date_get)[0]
            series_get = series_get.replace(' ', '')
            if (series_get.upper() == number_series.upper()
                    or series_get.replace('-', '').upper()
                    == number_series.upper()) and number_date == date_get:
                movie_found = 1
                break
        if movie_found == 0:
            raise Exception('Movie Data not found in javdb.main_us!')
        result_url = 'https://javdb.com' + html.xpath(
            '//*[@id="videos"]/div/div/a/@href')[count - 1]
        # ========================================================================请求、判断结果
        html_info = get_html(result_url).replace(u'\xa0', u' ')
        if str(html_info) == 'ProxyError':
            raise TimeoutError
        # ========================================================================收集信息
        actor = getActor(html_info)
        number = getNumber(html_info)
        dic = {
            'actor':
            str(actor).strip(" [',']").replace('\'', ''),
            'title':
            getTitle(html_info).replace('中文字幕', '').replace("\\n", '').replace(
                '_', '-').replace(number, '').strip(),
            'studio':
            getStudio(html_info),
            'publisher':
            getPublisher(html_info),
            'outline':
            '',
            'score':
            getScore(html_info),
            'runtime':
            getRuntime(html_info).replace(' 分鍾', ''),
            'director':
            getDirector(html_info),
            'release':
            getRelease(html_info),
            'number':
            number,
            'cover':
            getCover_us(html_info),
            'cover_small':
            getCover_small(htmlcode, count - 1),
            'extrafanart':
            getExtraFanart(html_info),
            'imagecut':
            3,
            'tag':
            getTag(html_info),
            'series':
            getSeries(html_info),
            'year':
            getYear(getRelease(html_info)
                    ),  # str(re.search('\d{4}',getRelease(htmlcode)).group()),
            'actor_photo':
            getActorPhoto(actor),
            'website':
            result_url,
            'source':
            'javdb.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in javdb.main_us : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #22

0

Afficher le fichier

Fichier : javdb.py Projet : jason750718/AVDC_CUSTOM

def main(number, isuncensored=False):
    try:
        # ========================================================================搜索番号
        htmlcode = get_html('https://javdb.com/search?q=' + number +
                            '&f=all').replace(u'\xa0', u' ')
        if str(htmlcode) == 'ProxyError':
            raise TimeoutError
        html = etree.fromstring(
            htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        counts = len(
            html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']'
            ))
        if counts == 0:
            raise Exception('Movie Data not found in javdb.main!')
        # ========================================================================遍历搜索结果，找到需要的番号所在URL
        count = 1
        number_get = ''
        movie_found = 0
        for count in range(1, counts + 1):
            number_get = html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']['
                + str(count) +
                ']/a[@class=\'box\']/div[@class=\'uid\']/text()')[0]
            if number_get.upper() == number.upper():
                movie_found = 1
                break
        if movie_found == 0:
            raise Exception('Movie Data not found in javdb.main!')
        result_url = 'https://javdb.com' + html.xpath(
            '//*[@id="videos"]/div/div/a/@href')[count - 1]
        # ========================================================================请求、判断结果
        html_info = get_html(result_url).replace(u'\xa0', u' ')
        if str(html_info) == 'ProxyError':
            raise TimeoutError
        # ========================================================================获取评分、简介
        imagecut = 1
        cover_small = ''
        outline = ''
        if isuncensored or re.match('^\d{4,}', number) or re.match(
                'n\d{4}', number):  # 无码，收集封面、评分
            imagecut = 3
            cover_small = getCover_small(htmlcode, count - 1)
            score = getScore(html_info)
        elif 'HEYZO' in number.upper():  # HEYZO，收集封面、评分、简介
            imagecut = 3
            cover_small = getCover_small(htmlcode, count - 1)
            outline, score = getOutlineScore(number)
        else:  # 其他，收集评分、简介
            outline, score = getOutlineScore(number)
        # ========================================================================收集信息
        actor = getActor(html_info)
        if len(actor) == 0 and 'FC2-' in number_get:
            actor.append('FC2-NoActor')
        dic = {
            'actor':
            str(actor).strip(" [',']").replace('\'', ''),
            'title':
            getTitle(html_info).replace('中文字幕', '').replace('無碼', '').replace(
                "\\n",
                '').replace('_', '-').replace(number_get, '').strip().replace(
                    ' ', '-').replace('--', '-'),
            'studio':
            getStudio(html_info),
            'publisher':
            getPublisher(html_info),
            'outline':
            outline,
            'score':
            score,
            'runtime':
            getRuntime(html_info).replace(' 分鍾', ''),
            'director':
            getDirector(html_info),
            'release':
            getRelease(html_info),
            'number':
            number_get,
            'cover':
            getCover(html_info),
            'cover_small':
            cover_small,
            'extrafanart':
            getExtraFanart(html_info),
            'imagecut':
            imagecut,
            'tag':
            getTag(html_info),
            'series':
            getSeries(html_info),
            'year':
            getYear(getRelease(html_info)
                    ),  # str(re.search('\d{4}',getRelease(htmlcode)).group()),
            'actor_photo':
            getActorPhoto(actor),
            'website':
            result_url,
            'source':
            'javdb.py',
        }
    except TimeoutError:
        dic = {
            'title': '',
            'website': 'timeout',
        }
    except Exception as error_info:
        print('Error in javdb.main : ' + str(error_info))
        dic = {
            'title': '',
            'website': '',
        }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #23

0

Afficher le fichier

def main(number):
    try:
        htmlcode = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
        html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        counts = len(html.xpath(
            '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\']'))
        if counts == 0:
            dic = {
                'title': '',
                'actor': '',
                'website': '',
            }
            js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
                            separators=(',', ':'), )  # .encode('UTF-8')
            return js
        count = 1
        number_get = ''
        movie_found = 0
        result_url = ''
        for count in range(1, counts + 1):  # 遍历搜索结果，找到需要的番号
            number_get = html.xpath(
                '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][' + str(
                    count) + ']/a[@class=\'box\']/div[@class=\'uid\']/text()')[0]
            # number_get = number_get.replace('_', '-')
            if number_get == number.upper() or number_get == number.lower():
                movie_found = 1
                break
        result_url = 'https://javdb.com' + html.xpath('//*[@id="videos"]/div/div/a/@href')[count - 1]
        b = get_html(result_url).replace(u'\xa0', u' ')
        actor = getActor(b)
        if len(actor) == 0 and 'FC2-' in number_get:
            actor.append('FC2-NoActor')
        if movie_found == 1:
            dic = {
                'actor': str(actor).strip(" [',']").replace('\'', ''),
                'title': getTitle(b).replace('中文字幕', '').replace("\\n", '').replace('_', '-').replace(number_get,
                                                                                                      '').strip().replace(
                    ' ', '-').replace('--', '-'),
                'studio': getStudio(b),
                'publisher': getPublisher(b),
                'outline': getOutline(b).replace('\n', ''),
                'runtime': getRuntime(b).replace(' 分鍾', ''),
                'director': getDirector(b),
                'release': getRelease(b),
                'number': number_get,
                'cover': getCover(b),
                'cover_small': getCover_small(htmlcode, count - 1),
                'imagecut': 3,
                'tag': getTag(b),
                'series': getSeries(b),
                'year': getYear(getRelease(b)),  # str(re.search('\d{4}',getRelease(htmlcode)).group()),
                'actor_photo': getActorPhoto(actor),
                'website': result_url,
                'source': 'javdb.py',
            }
        else:  # 未找到番号
            dic = {
                'title': '',
                'actor': str(actor).strip(" [',']").replace('\'', ''),
                'website': '',
            }
    except:  # actor 用于判断ip是否被封
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'actor': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'actor': '',
                'website': '',
            }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js

Exemple #24

0

Afficher le fichier

Fichier : javbus.py Projet : yellowSpringSky/AVDC

def main_uncensored(number):
    result_url = find_number(number)
    if result_url == 'not found':
        dic = {
            'title': '',
            'actor': '',
            'website': '',
        }
        js = json.dumps(
            dic,
            ensure_ascii=False,
            sort_keys=True,
            indent=4,
            separators=(',', ':'),
        )  # .encode('UTF-8')
        return js
    htmlcode = get_html(result_url)
    try:
        number = getNum(htmlcode)
        dic = {
            'title':
            getTitle(htmlcode).replace(number, '').strip().replace(' ', '-'),
            'studio':
            getStudio(htmlcode),
            'publisher':
            '',
            'year':
            getYear(getRelease(htmlcode)),
            'outline':
            '',
            'runtime':
            getRuntime(htmlcode).replace('分鐘', '').strip(),
            'director':
            getDirector(htmlcode),
            'actor':
            getActor(htmlcode),
            'release':
            getRelease(htmlcode),
            'number':
            getNum(htmlcode),
            'cover':
            getCover(htmlcode),
            'tag':
            getTag(htmlcode),
            'series':
            getSeries(htmlcode),
            'imagecut':
            3,
            'cover_small':
            getCover_small(number),
            'actor_photo':
            getActorPhoto(htmlcode),
            'website':
            result_url,
            'source':
            'javbus.py',
        }
        if dic['cover_small'] == '':
            dic['imagecut'] = 0
    except Exception as error_info:
        print('Error in javbus.main_uncensored :' + str(error_info))
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #25

0

Afficher le fichier

Fichier : javlibrary.py Projet : yellowSpringSky/AVDC

def main(number, javlibrary_url):
    try:
        htmlcode = get_html('http://' + javlibrary_url +
                            '/ja/vl_searchbyid.php?keyword=' + number).replace(
                                u'\xa0', u' ')
        title = getTitle(htmlcode)
        movie_found = 1
        if title == '':  # 页面为搜索结果页，而不是视频信息页，遍历搜索结果
            movie_found = 0
            html = etree.fromstring(
                htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
            count_all = len(
                html.xpath(
                    "//div[@class='videothumblist']/div[@class='videos']/div[@class='video']"
                ))
            for count in range(1, count_all + 1):
                number_get = str(
                    html.xpath(
                        "//div[@class='videothumblist']/div[@class='videos']/div["
                        + str(count) + "]/a/div[1]/text()")).strip(" ['']")
                if number_get == number.upper():
                    url_get = str(
                        html.xpath(
                            "//div[@class='videothumblist']/div[@class='videos']/div["
                            + str(count) + "]/a/@href")).strip(" ['.']")
                    htmlcode = get_html('http://' + javlibrary_url + '/ja' +
                                        url_get).replace(u'\xa0', u' ')
                    movie_found = 1
                    break
        if movie_found == 1:
            try:  # 从dmm获取简介
                dww_htmlcode = get_html(
                    "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=" +
                    number.replace("-", '00'))
            except:
                dww_htmlcode = ''
            actor = getActor(htmlcode)
            number = getNum(htmlcode)
            release = getRelease(htmlcode)
            dic = {
                'actor':
                str(actor).strip(" [',']").replace('\'', ''),
                'title':
                getTitle(htmlcode).replace(
                    '中文字幕', '').replace("\\n", '').replace('_', '-').replace(
                        number, '').strip().replace(' ',
                                                    '-').replace('--', '-'),
                'studio':
                getStudio(htmlcode),
                'publisher':
                getPublisher(htmlcode),
                'outline':
                getOutline(dww_htmlcode).replace('\n', '').replace(
                    '\\n', '').replace('\'', '').replace(',',
                                                         '').replace(' ', ''),
                'runtime':
                getRuntime(htmlcode),
                'director':
                str(getDirector(htmlcode)).replace('----', ''),
                'release':
                release,
                'number':
                number,
                'cover':
                getCover(htmlcode),
                'imagecut':
                1,
                'tag':
                getTag(htmlcode),
                'series':
                '',
                'year':
                getYear(release),
                'actor_photo':
                getActorPhoto(actor),
                'website':
                getWebsite(htmlcode),
                'source':
                'javlibrary.py',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    except:
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js

Exemple #26

0

Afficher le fichier

Fichier : javbus.py Projet : yellowSpringSky/AVDC

def main(number):
    result_url = find_number(number)
    if result_url == 'not found':
        dic = {
            'title': '',
            'actor': '',
            'website': '',
        }
        js = json.dumps(
            dic,
            ensure_ascii=False,
            sort_keys=True,
            indent=4,
            separators=(',', ':'),
        )  # .encode('UTF-8')
        return js
    htmlcode = get_html(result_url)
    try:
        dww_htmlcode = get_html(
            "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=" +
            number.replace("-", '00'))
    except:
        dww_htmlcode = ''
    try:
        number = getNum(htmlcode)
        dic = {
            'title':
            str(getTitle(htmlcode)).replace(number,
                                            '').strip().replace(' ', '-'),
            'studio':
            getStudio(htmlcode),
            'publisher':
            getPublisher(htmlcode),
            'year':
            getYear(getRelease(htmlcode)),
            'outline':
            getOutline(dww_htmlcode).replace('\n', '').replace(
                '\\n', '').replace('\'', '').replace(',', '').replace(' ', ''),
            'runtime':
            getRuntime(htmlcode).replace('分鐘', '').strip(),
            'director':
            getDirector(htmlcode),
            'actor':
            getActor(htmlcode),
            'release':
            getRelease(htmlcode),
            'number':
            number,
            'cover':
            getCover(htmlcode),
            'imagecut':
            1,
            'tag':
            getTag(htmlcode),
            'series':
            getSeries(htmlcode),
            'actor_photo':
            getActorPhoto(htmlcode),
            'website':
            result_url,
            'source':
            'javbus.py',
        }
    except Exception as error_info:
        print('Error in javbus.main :' + str(error_info))
        if htmlcode == 'ProxyError':
            dic = {
                'title': '',
                'website': 'timeout',
            }
        else:
            dic = {
                'title': '',
                'website': '',
            }
    js = json.dumps(
        dic,
        ensure_ascii=False,
        sort_keys=True,
        indent=4,
        separators=(',', ':'),
    )  # .encode('UTF-8')
    return js