Exemple #1
0
def main(number: str):
    raw_cookies, user_agent = get_javlib_cookie()

    # Blank cookies mean javlib site return error
    if not raw_cookies:
        return json.dumps({},
                          ensure_ascii=False,
                          sort_keys=True,
                          indent=4,
                          separators=(',', ':'))

    # Manually construct a dictionary
    s_cookie = SimpleCookie()
    s_cookie.load(raw_cookies)
    cookies = {}
    for key, morsel in s_cookie.items():
        cookies[key] = morsel.value

    # Scraping
    result = get_html(
        "http://www.m45e.com/cn/vl_searchbyid.php?keyword={}".format(number),
        cookies=cookies,
        ua=user_agent,
        return_type="object")
    soup = BeautifulSoup(result.text, "html.parser")
    lx = html.fromstring(str(soup))

    if "/?v=jav" in result.url:
        dic = {
            "title":
            get_title(lx, soup),
            "studio":
            get_table_el_single_anchor(soup, "video_maker"),
            "year":
            get_table_el_td(soup, "video_date")[:4],
            "outline":
            "",
            "director":
            get_table_el_single_anchor(soup, "video_director"),
            "cover":
            get_cover(lx),
            "imagecut":
            1,
            "actor_photo":
            "",
            "website":
            result.url,
            "source":
            "javlib.py",
            "actor":
            get_table_el_multi_anchor(soup, "video_cast"),
            "label":
            get_table_el_td(soup, "video_label"),
            "tag":
            get_table_el_multi_anchor(soup, "video_genres"),
            "number":
            get_table_el_td(soup, "video_id"),
            "release":
            get_table_el_td(soup, "video_date"),
            "runtime":
            get_from_xpath(
                lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
            "series":
            '',
        }
    else:
        dic = {}

    return json.dumps(dic,
                      ensure_ascii=False,
                      sort_keys=True,
                      indent=4,
                      separators=(',', ':'))
Exemple #2
0
def main(number: str):
    raw_cookies, user_agent = get_javlib_cookie()

    # Blank cookies mean javlib site return error
    if not raw_cookies:
        return json.dumps({}, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))

    # Manually construct a dictionary
    s_cookie = SimpleCookie()
    s_cookie.load(raw_cookies)
    cookies = {}
    for key, morsel in s_cookie.items():
        cookies[key] = morsel.value

    # Scraping
    result = get_html(
        "http://www.javlibrary.com/cn/vl_searchbyid.php?keyword={}".format(number),
        cookies=cookies,
        ua=user_agent,
        return_type="object"
    )
    soup = BeautifulSoup(result.text, "html.parser")
    lx = html.fromstring(str(soup))

    fanhao_pather = re.compile(r'<a href=".*?".*?><div class="id">(.*?)</div>')
    fanhao = fanhao_pather.findall(result.text)

    if "/?v=jav" in result.url:
        dic = {
            "title": get_title(lx, soup),
            "studio": get_table_el_single_anchor(soup, "video_maker"),
            "year": get_table_el_td(soup, "video_date")[:4],
            "outline": get_outline(number),
            "director": get_table_el_single_anchor(soup, "video_director"),
            "cover": get_cover(lx),
            "imagecut": 1,
            "actor_photo": "",
            "website": result.url,
            "source": "javlib.py",
            "actor": get_table_el_multi_anchor(soup, "video_cast"),
            "label": get_table_el_td(soup, "video_label"),
            "tag": get_table_el_multi_anchor(soup, "video_genres"),
            "number": get_table_el_td(soup, "video_id"),
            "release": get_table_el_td(soup, "video_date"),
            "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
            "series":'',
        }
    elif number.upper() in fanhao:
        url_pather = re.compile(r'<a href="(.*?)".*?><div class="id">(.*?)</div>')
        s = {}
        url_list = url_pather.findall(result.text)
        for url in url_list:
            s[url[1]] = 'http://www.javlibrary.com/cn/' + url[0].lstrip('.')
        av_url = s[number.upper()]
        result = get_html(
            av_url,
            cookies=cookies,
            ua=user_agent,
            return_type="object"
        )
        soup = BeautifulSoup(result.text, "html.parser")
        lx = html.fromstring(str(soup))

        dic = {
            "title": get_title(lx, soup),
            "studio": get_table_el_single_anchor(soup, "video_maker"),
            "year": get_table_el_td(soup, "video_date")[:4],
            "outline": get_outline(number),
            "director": get_table_el_single_anchor(soup, "video_director"),
            "cover": get_cover(lx),
            "imagecut": 1,
            "actor_photo": "",
            "website": result.url,
            "source": "javlib.py",
            "actor": get_table_el_multi_anchor(soup, "video_cast"),
            "label": get_table_el_td(soup, "video_label"),
            "tag": get_table_el_multi_anchor(soup, "video_genres"),
            "number": get_table_el_td(soup, "video_id"),
            "release": get_table_el_td(soup, "video_date"),
            "runtime": get_from_xpath(lx, '//*[@id="video_length"]/table/tr/td[2]/span/text()'),
            "series": '',
        }
    else:
        dic = {"title": ""}

    return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))