コード例 #1
0
ファイル: metajavlib.py プロジェクト: yyymess/avdc
def main(number: str) -> Movie:
    javlib_scrap = javlib.main(number)
    if not javlib_scrap.is_filled():
        return javlib_scrap

    jav321_scrap = jav321.main(javlib_scrap.movie_id)
    javdb_scrap = javdb.main(javlib_scrap.movie_id)
    if not javlib_scrap.match_movie(jav321_scrap):
        jav321_scrap = Movie()
    if not javlib_scrap.match_movie(javdb_scrap):
        javdb_scrap = Movie()

    javlib_scrap.series = jav321_scrap.series or javdb_scrap.series
    javlib_scrap.outline = jav321_scrap.outline or javdb_scrap.outline
    javlib_scrap.extra_fanart = (jav321_scrap.extra_fanart or javdb_scrap.extra_fanart)
    javlib_scrap.ratings.extend(javdb_scrap.ratings)

    if not javlib_scrap.actors:
        javlib_scrap.actors = javdb_scrap.actors or jav321_scrap.actors

    if not javlib_scrap.director:
        javlib_scrap.director = javdb_scrap.director or jav321_scrap.director

    if not javlib_scrap.studio:
        javlib_scrap.studio = javdb_scrap.studio or jav321_scrap.studio

    if not javlib_scrap.series:
        javlib_scrap.series = javdb_scrap.series or jav321_scrap.series

    javlib_scrap.merge_tags(jav321_scrap)
    javlib_scrap.merge_tags(javdb_scrap)


    return javlib_scrap
コード例 #2
0
ファイル: javlib.py プロジェクト: yyymess/avdc
def _add_rating(movie: Movie, lx: html.HtmlElement) -> None:
    score = lx.xpath('//*[@id="video_review"]//span[@class="score"]/text()')
    if not score:
        return
    try:
        score = float(score[0].strip('( )'))
        movie.add_rating(rating=score, source='javlib', max_rating=10.0)
    except:
        logger.debug('评分刮削失败。')
        pass
コード例 #3
0
ファイル: mgstage.py プロジェクト: yyymess/avdc
def _set_rating(movie: Movie, htmlcode) -> None:
    html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = html.xpath('//th[contains(text(),"評価")]/../td/text()')
    result = ''.join([i.strip() for i in result])

    try:
        movie.add_rating(rating = float(result[:3]),
                         votes = int(result[4:].split(' ')[0]),
                         source = 'mgstage',
                         max_rating = 5.0)
    except:
        logger.debug('评分刮削失败。')
コード例 #4
0
def set_rating(movie: Movie, htmlcode) -> None:
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    try:
        rating_div = html.xpath('//a[@class="items_article_Stars"]')[0]
        stars = rating_div.xpath('.//span')
        rating = float(stars[0].attrib['class'][-1])
        votes = int(stars[1].text)
        movie.add_rating(rating=rating,
                         max_rating=5.0,
                         source='fc2',
                         votes=votes)
    except:
        logger.debug('评分刮削失败')
コード例 #5
0
def set_rating(movie: Movie, a) -> None:
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = html.xpath('//strong[contains(text(),"評分")]/../span//text()')
    result = [i.strip() for i in result]
    result = [i for i in result if i]
    if not result:
        return

    result = result[0]
    result = re.search(r'^(\d\.\d+).*由(\d+)人評價', result)
    if result:
        rating = float(result.group(1))
        votes = int(result.group(2))
        movie.add_rating(rating = rating,
                         max_rating= 5.0,
                         votes= votes,
                         source='javdb')
コード例 #6
0
ファイル: mgstage.py プロジェクト: yyymess/avdc
def _set_tags(movie: Movie, a) -> None:
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()|'
                         '//th[contains(text(),"ジャンル:")]/../td/text()')
    result = set([i.strip() for i in result if i.strip()])
    result.add('日本')
    result.add('有码')
    movie.tags = list(result)
コード例 #7
0
ファイル: jav321.py プロジェクト: yyymess/avdc
def main(number: str) -> Movie:
    result = post_html(url="https://www.jav321.com/search",
                       query={"sn": number})

    movie = Movie()

    if not result.text.strip():
        return movie

    soup = BeautifulSoup(result.text, "html.parser")
    lx = html.fromstring(str(soup))
    if "/video/" in result.url:
        parse_info(soup, movie)
        movie.title = get_title(lx)
        movie.outline = get_outline(lx)
        movie.cover = get_cover(lx)
        movie.extra_fanart = get_extrafanart(result.text)
        movie.trailer = get_trailer(result.text)
        movie.imagecut = 1
        movie.scraper_source = 'jav321'
        movie.website = result.url

    return movie
コード例 #8
0
ファイル: jav321.py プロジェクト: yyymess/avdc
def parse_info(soup: BeautifulSoup, movie: Movie) -> None:
    data = soup.select_one("div.row > div.col-md-9")

    if data:
        dd = str(data).split("<br/>")
        data_dic = {}
        for d in dd:
            data_dic[get_bold_text(h=d)] = d

        movie.actors = get_actor(data_dic)
        movie.studio = get_studio(data_dic)
        movie.tags = get_tag(data_dic)
        movie.release = get_release(data_dic)
        movie.runtime = get_runtime(data_dic)
        movie.series = get_series(data_dic)
        movie.movie_id = get_number(data_dic)
コード例 #9
0
ファイル: javlib.py プロジェクト: yyymess/avdc
def main(number: str) -> Movie:
    raw_cookies, user_agent = get_javlib_cookie()

    # Blank cookies mean javlib site return error
    if not raw_cookies:
        return json.dumps({},
                          ensure_ascii=False,
                          sort_keys=True,
                          indent=4,
                          separators=(',', ':'))

    # Manually construct a dictionary
    s_cookie = SimpleCookie()
    s_cookie.load(raw_cookies)
    cookies = {}
    for key, morsel in s_cookie.items():
        cookies[key] = morsel.value

    # Scraping
    result = get_html(
        "http://www.javlibrary.com/cn/vl_searchbyid.php?keyword={}".format(
            number),
        cookies=cookies,
        ua=user_agent,
        return_type="object")
    soup = BeautifulSoup(result.text, "html.parser")
    lx = html.fromstring(str(soup))

    if "/?v=jav" in result.url:
        return extract_movie(lx, soup, result.url)
    else:
        url = _find_best_movie_match(lx, number)
        if url:
            result = get_html(url,
                              cookies=cookies,
                              ua=user_agent,
                              return_type="object")
            soup = BeautifulSoup(result.text, "html.parser")
            lx = html.fromstring(str(soup))
            return extract_movie(lx, soup, result.url)

    return Movie()
コード例 #10
0
def main(number):
    movie = Movie()
    try:
        # if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number).group():
        #     pass
        # else:
        #     number = number.upper()
        number = number.upper()
        try:
            query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
        except:
            query_result = get_html('https://javdb4.com/search?q=' + number + '&f=all')
        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
        # javdb sometime returns multiple results,
        # and the first elememt maybe not the one we are looking for
        # iterate all candidates and find the match one
        urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
        # 记录一下欧美的ids  ['Blacked','Blacked']
        correct_url = ''
        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
            correct_url = urls[0]
        else:
            ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
            ids = [i.upper() for i in ids]
            if number.upper() in ids:
                correct_url = urls[ids.index(number.upper())]

        if not correct_url:
            return Movie()
        detail_page = get_html('https://javdb.com' + correct_url)

        # no cut image by default
        imagecut = 3
        # If gray image exists ,then replace with normal cover
        if re.search(r'[a-zA-Z]+\.\d{2}\.\d{2}\.\d{2}', number):
            cover_small = getCover_small(query_result)
        else:
            cover_small = getCover_small(query_result, index=ids.index(number))
        if 'placeholder' in cover_small:
            # replace wit normal cover and cut it
            imagecut = 1
            cover_small = getCover(detail_page)

        number = getNum(detail_page)
        title = getTitle(detail_page)
        if title and number:
            # remove duplicate title
            title = title.replace(number, '').strip()

        movie.actors = getActor(detail_page)
        movie.title = title
        movie.studio = getStudio(detail_page)
        movie.outline = getOutline(detail_page)
        movie.runtime = getRuntime(detail_page)
        movie.director = getDirector(detail_page)
        movie.release = getRelease(detail_page)
        movie.movie_id = number
        movie.cover = getCover(detail_page)
        movie.cover_small = cover_small
        movie.trailer = getTrailer(detail_page)
        movie.extra_fanart = getExtrafanart(detail_page)
        movie.imagecut = imagecut
        movie.tags = getTag(detail_page)
        movie.label = getLabel(detail_page)
        # 'actor_photo': getActorPhoto(detail_page),
        movie.website = 'https://javdb.com' + correct_url
        movie.scraper_source = 'javdb'
        movie.series = getSeries(detail_page)
        set_rating(movie, detail_page)

    except Exception as e:
        logger.warning(e, exc_info=True)
        movie = Movie()
    return movie
コード例 #11
0
ファイル: core.py プロジェクト: yyymess/avdc
def get_data_from_json(file_number: str, filepath: str) -> Movie:  # 从JSON返回元数据
    """
    iterate through all services and fetch the data 
    """
    conf = Config.get_instance()

    func_mapping = {
        "airav": airav.main,
        "avsox": avsox.main,
        "fc2": fc2.main,
        "fanza": fanza.main,
        "javdb": javdb.main,
        "javbus": javbus.main,
        "mgstage": mgstage.main,
        "jav321": jav321.main,
        "xcity": xcity.main,
        "javlib": javlib.main,
        "dlsite": dlsite.main,
        "metajavlib": metajavlib.main,
    }

    # default fetch order list, from the beginning to the end
    sources = conf.sources().split(',')

    # if the input file name matches certain rules,
    # move some web service to the beginning of the list
    if "avsox" in sources and (re.match(r"^\d{5,}", file_number) or "HEYZO"
                               in file_number or "heyzo" in file_number
                               or "Heyzo" in file_number):
        # if conf.debug() == True:
        #     print('[+]select avsox')
        sources.insert(0, sources.pop(sources.index("avsox")))
    elif "mgstage" in sources and (re.match(r"\d+\D+", file_number)
                                   or "SIRO" in file_number.upper()):
        # if conf.debug() == True:
        # print('[+]select fanza')
        sources.insert(0, sources.pop(sources.index("mgstage")))
    elif "fc2" in sources and ("FC2" in file_number.upper()):
        # if conf.debug() == True:
        #     print('[+]select fc2')
        sources.insert(0, sources.pop(sources.index("fc2")))
    elif "dlsite" in sources and ("RJ" in file_number or "rj" in file_number or
                                  "VJ" in file_number or "vj" in file_number):
        # if conf.debug() == True:
        #     print('[+]select dlsite')
        sources.insert(0, sources.pop(sources.index("dlsite")))

    json_data = {}
    movie = None
    for source in sources:
        try:
            if conf.debug():
                logger.attn(f'select {source}')
            returnval = func_mapping[source](file_number)
            if (isinstance(returnval, Movie)):
                if returnval.is_filled():
                    movie = returnval
                    break
            else:
                json_data = json.loads(returnval)
                # if any service return a valid return, break
                if get_data_state(json_data):
                    break
        except:
            traceback.print_exc()
            break

    # Return if data not found in all sources
    if not json_data and not movie:
        print('[-]Movie Data not found!')
        moveFailedFolder(filepath, conf.failed_folder())
        return Movie()

    # ================================================网站规则添加结束================================================
    if not movie:
        movie = Movie()
        movie.title = json_data.get('title')
        movie.actors = json_data.get('actor')
        movie.release = json_data.get('release')
        movie.cover_small = json_data.get('cover_small')
        movie.cover = json_data.get('cover')
        movie.tags = json_data.get('tag')
        movie.year = json_data.get('year')
        movie.series = json_data.get('series')
        movie.runtime = json_data.get('runtime')
        movie.outline = json_data.get('outline')
        movie.scraper_source = json_data.get('source')
        movie.label = json_data.get('label')
        movie.studio = json_data.get('studio')
        movie.director = json_data.get('director')
        movie.movie_id = json_data.get('number')
        movie.trailer = json_data.get('trailer')
        movie.website = json_data.get('website')
        movie.imagecut = json_data.get('imagecut')
        movie.extra_fanart = json_data.get('extrafanart')

    movie.original_path = filepath

    if not movie.is_filled():
        print('[-]Movie Data not found!')
        moveFailedFolder(filepath, conf.failed_folder())
        return Movie()
    """
    TODO:  翻译以后再说
    if conf.is_transalte():
        translate_values = conf.transalte_values().split(",")
        for translate_value in translate_values:
            if json_data[translate_value] == "":
                continue
            # if conf.get_transalte_engine() == "baidu":
            #     json_data[translate_value] = translate(
            #         json_data[translate_value],
            #         target_language="zh",
            #         engine=conf.get_transalte_engine(),
            #         app_id=conf.get_transalte_appId(),
            #         key=conf.get_transalte_key(),
            #         delay=conf.get_transalte_delay(),
            #     )
            if conf.get_transalte_engine() == "azure":
                json_data[translate_value] = translate(
                    json_data[translate_value],
                    target_language="zh-Hans",
                    engine=conf.get_transalte_engine(),
                    key=conf.get_transalte_key(),
                )
            else:
                json_data[translate_value] = translate(json_data[translate_value])
    """

    logger.debug(movie)
    return movie
コード例 #12
0
ファイル: mgstage.py プロジェクト: yyymess/avdc
def _set_actors(movie: Movie, a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
    result = html.xpath('//th[contains(text(),"出演:")]/../td/a/text()|'
                        '//th[contains(text(),"出演:")]/../td/text()')
    result = [i.strip() for i in result if i.strip()]
    movie.actors = result
コード例 #13
0
ファイル: mgstage.py プロジェクト: yyymess/avdc
def main(number2) -> Movie:
    number=number2.upper()
    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
    soup = BeautifulSoup(htmlcode, 'lxml')
    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
    b = str(soup.find(attrs={'id': 'introduction'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')

    movie = Movie()
    movie.title = getTitle(htmlcode).replace("\\n",'').replace('        ','')
    movie.studio = getStudio(a)
    movie.outline = getOutline(b)
    movie.runtime = getRuntime(a)
    _set_actors(movie, a)
    movie.release = getRelease(a)
    movie.movie_id = getNum(a)
    movie.cover = getCover(htmlcode)
    movie.imagecut = 0
    _set_tags(movie, a)
    movie.extra_fanart = getExtrafanart(htmlcode)
    movie.website = f'https://www.mgstage.com/product/product_detail/{number}/'
    movie.scraper_source = 'mgstage'
    movie.series = getSeries(a)
    _set_rating(movie, a)

    return movie
コード例 #14
0
def main(number):
    movie = Movie()
    try:
        number = number.replace('FC2-', '').replace('fc2-', '')
        htmlcode = get_html('https://adult.contents.fc2.com/article/' +
                            number + '/')
        movie.title = getTitle_fc2com(htmlcode)
        movie.release = getRelease_fc2com(htmlcode)
        movie.movie_id = f'FC2-{number}'
        # 因为FC2,用上传者代表演员
        movie.actors = [getStudio_fc2com(htmlcode)]
        movie.studio = getStudio_fc2com(htmlcode)
        movie.director = getStudio_fc2com(htmlcode)
        movie.cover = getCover_fc2com(htmlcode)
        movie.imagecut = 0
        movie.extra_fanart = getExtrafanart(htmlcode)
        movie.trailer = getTrailer(htmlcode)
        movie.tags = getTag_fc2com(number)
        movie.website = f'https://adult.contents.fc2.com/article/{number}/'
        movie.scraper_source = 'fc2'
        set_rating(movie, htmlcode)
    except Exception as e:
        logger.error('fc2刮削失败。')
        logger.debug('', exc_info=True)
        movie = Movie()
    return movie
コード例 #15
0
ファイル: javlib.py プロジェクト: yyymess/avdc
def extract_movie(lx: html.HtmlElement, soup: BeautifulSoup,
                  url: str) -> Movie:
    movie = Movie()
    movie.title = get_title(lx, soup)
    movie.studio = get_table_el_single_anchor(soup, "video_maker")
    movie.director = get_table_el_single_anchor(soup, "video_director")
    movie.cover = get_cover(lx)
    movie.imagecut = 1
    movie.website = url
    movie.scraper_source = 'javlib'
    movie.actors = get_table_el_multi_anchor(soup, "video_cast").split(',')
    movie.label = get_table_el_td(soup, "video_label")
    movie.tags = get_table_el_multi_anchor(soup, "video_genres").split(',')
    movie.movie_id = get_table_el_td(soup, "video_id")
    movie.release = get_table_el_td(soup, "video_date")
    movie.runtime = get_from_xpath(
        lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()')

    _add_rating(movie, lx)
    movie.add_tag('日本')
    movie.add_tag('有码')

    return movie