예제 #1
0
    def get_brief(mcs, code):
        url = "https://avsox.host/cn/search/" + code
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        match = re.search(mcs.__url_pattern, html)
        if not match:
            return None
        url = match.group(1)
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        bs = bs4.BeautifulSoup(html, "lxml")
        movie = bs.select(".movie")[0]

        brief = Brief()
        brief.code = code
        img = movie.select(".screencap", limit=1)[0].a.img
        brief.title = img.attrs["title"]

        brief.release_date = noexcept(
            lambda: re.search(mcs.__release_date_pattern, str(movie)).group(1),
            "")

        brief.actress = ", ".join(x.text for x in bs.select(
            "#avatar-waterfall", limit=1)[0].find_all("span"))

        rsp = requests.get(img.attrs["src"], proxies=proxy)
        if 300 <= rsp.status_code <= 400:
            if "location" in rsp.headers:
                brief.preview_img_url = rsp.headers["location"]
        elif rsp.status_code == 200:
            brief.preview_img_url = img.attrs["src"]

        return brief
예제 #2
0
 def release_date(self, date: Union[str, datetime.datetime]):
     if isinstance(date, datetime.datetime):
         self.__release_date = date
     else:
         self.__release_date = noexcept(
             lambda: datetime.datetime.strptime(date, "%Y-%m-%d"), None
         )
예제 #3
0
 def __get_brief_from_tr(mcs, tr):
     brief = Brief()
     brief.preview_img_url = noexcept(lambda: tr.attrs["data-img"])
     if brief.preview_img_url and brief.preview_img_url.startswith("/"):
         brief.preview_img_url = "http://warashi-asian-pornstars.fr" + brief.preview_img_url
     tds = tr.select("td")
     brief.title = tds[1].text.strip()
     brief.code = tds[2].text.upper()
     brief.release_date = tds[5].text.strip()
     return brief
예제 #4
0
파일: etigoya.py 프로젝트: wc-itdog/JavPy
 def get_history_names_by_li(mcs, li):
     url = noexcept(
         lambda: re.search(Etigoya.url_pattern, str(li)).group(0))
     if not url:
         return []
     html = requests.get(url, proxies=proxy).text
     names = [
         re.sub(Etigoya.purify_pattern, "", s).strip()
         for s in re.findall(Etigoya.name_pattern, html)
     ]
     return names
예제 #5
0
    def get_brief_from_a_card(card_tag):
        release_date = noexcept(lambda: datetime.datetime.strptime(
            re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0),
            "%Y-%m-%d"))

        actress = list(
            map(lambda x: x.text,
                card_tag.find_all(name="a", attrs={"class": "btn-danger"})))

        img = noexcept(lambda: card_tag.find(name="img").attrs["data-src"])
        if not img.startswith("http"):
            img = "http:" + img

        brief = Brief()
        brief.preview_img_url = img
        brief.title = noexcept(lambda: card_tag.find(name="h5").text.strip(),
                               "")
        brief.actress = ", ".join(actress)
        brief.release_date = release_date
        brief.code = card_tag.find(name="h4").text.strip()

        return brief
예제 #6
0
    def __check_name_in_box(mcs, name, box):
        if name not in box.text.lower():
            return None
        title = box.find(name="p").text.lower()
        jp_name = title.split("-")[1].strip()
        if not jp_name:
            return None

        # cache for later parsing actress info, None for no url
        url = noexcept(lambda: box.a.attrs["href"])
        detail_url = "http://warashi-asian-pornstars.fr/%s" % url
        mcs.__actress_detail_url[name] = detail_url
        mcs.__actress_detail_url[jp_name] = detail_url
        return jp_name
예제 #7
0
    def __get_brief_from_card(card):
        code = card.select(".tag.is-link.is-light")[0].text.strip()
        actress = ", ".join(
            (x.text.strip() for x in card.select(".tag.is-primary.is-light")))
        h5 = card.select(".title")[0]
        title = h5.text.strip()
        img = noexcept(lambda: h5.a.attrs["rel"][0])
        release_date = card.select("footer")[0].p.text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.release_date = release_date
        return brief
예제 #8
0
    def __get_brief_by_card(card):
        columns = card.select(".column")
        if not columns:  # like 飯岡かなこ
            return None
        code = columns[4].next.strip()
        actress = ", ".join(
            (x.text.strip() for x in columns[2].find_all(name="span")))
        title = columns[3].text.strip()
        img = noexcept(lambda: columns[3].a.attrs["rel"][0])
        release_date = columns[1].text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.release_date = release_date
        return brief
예제 #9
0
    def search_by_code(mcs, code):
        url = "http://www5.javmost.com/" + code + "/"
        main_rsp = mcs.__client.get(url, proxies=proxy)
        if main_rsp.status_code != 200:
            return None

        img = noexcept(
            lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"",
                              main_rsp.text).group(1))

        if not img:
            return None

        # Nov. 13 adding: https://www5.javmost.com/IENE-623/
        if not img.startswith("http:"):
            img = "http:" + img

        bs = bs4.BeautifulSoup(main_rsp.text, "lxml")

        buttons = bs.select(".tab-overflow")[0].find_all(name="li")[1:-1]

        var_value = re.search("'value':(.+?),", main_rsp.text).group(1)
        value = re.search("var %s = '(.+?)'" % var_value,
                          main_rsp.text).group(1)

        url = wait_until([
            submit(mcs.__try_one_button, button, value, main_rsp)
            for button in buttons
        ])

        if not url:
            return None

        av = AV()
        av.preview_img_url = img
        av.video_url = url
        av.code = code

        return av
예제 #10
0
 def processor(*x, **kwx):
     _args = args
     for i in args_place_holders:
         _args[i] = x[i]
     return noexcept(lambda: func(*_args, **fixed_kwargs, **kwx))