Beispiel #1
0
 def get_brief(cls, code):
     html = cls.__client.get(
         "http://www.javlibrary.com/ja/vl_searchbyid.php?keyword=" +
         code).text
     match = re.search(r"\"og:url\" content=\"//(.+?)\">", html)
     if not match:  # like JUFE-114
         bs = bs4.BeautifulSoup(html, "lxml")
         url = "http://www.javlibrary.com/ja" + bs.select(
             ".video")[0].a.attrs['href'][1:]
         rsp = cls.__client.get(url)
         match = re.search(r"\"og:url\" content=\"//(.+?)\">", rsp.text)
         if not match:
             return None
     url = match.group(1)
     html = cls.__client.get("http://" + url).text
     brief = Brief()
     bs = bs4.BeautifulSoup(html, "lxml")
     brief.title = bs.select(".post-title")[0].text
     brief.preview_img_url = bs.select("#video_jacket_img")[0].attrs["src"]
     if not brief.preview_img_url.startswith("http"):
         brief.preview_img_url = "http:" + brief.preview_img_url
     brief.code = code
     date = bs.select("#video_date")[0].select("td")[-1].text
     brief.set_release_date(date)
     brief.actress = ", ".join(
         (span.text for span in bs.select("#video_cast")[0].select(".star")
          ))  # like AQSH-035
     return brief
Beispiel #2
0
    def get_brief(cls, code):
        url = "https://avsox.net/cn/search/" + code
        rsp = requests.get(url)
        html = rsp.text

        url = re.search(cls.__url_pattern, html).group(1)
        rsp = requests.get(url)
        html = rsp.text

        bs = bs4.BeautifulSoup(html, "lxml")
        movie = bs.select(".movie")[0]

        brief = Brief()
        brief.code = code
        img = movie.select(".screencap", limit=1)[0].a.img
        brief.title = img.attrs['title']

        brief.set_release_date(
            try_evaluate(
                lambda: re.search(cls.__release_date_pattern, str(movie)).
                group(1), "")[0])
        brief.actress = ", ".join(x.text for x in bs.select(
            "#avatar-waterfall", limit=1)[0].find_all('span'))

        rsp = requests.get(img.attrs['src'])
        if 300 <= rsp.status_code <= 400:
            if "location" in rsp.headers:
                brief.preview_img_url = rsp.headers['location']
        elif rsp.status_code == 200:
            brief.preview_img_url = img.attrs['src']

        return brief
Beispiel #3
0
    def get_brief(mcs, code):
        url = "https://avsox.host/cn/search/" + code
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        match = re.search(mcs.__url_pattern, html)
        if not match:
            return None
        url = match.group(1)
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        bs = bs4.BeautifulSoup(html, "lxml")
        movie = bs.select(".movie")[0]

        brief = Brief()
        brief.code = code
        img = movie.select(".screencap", limit=1)[0].a.img
        brief.title = img.attrs["title"]

        brief.release_date = noexcept(
            lambda: re.search(mcs.__release_date_pattern, str(movie)).group(1),
            "")

        brief.actress = ", ".join(x.text for x in bs.select(
            "#avatar-waterfall", limit=1)[0].find_all("span"))

        rsp = requests.get(img.attrs["src"], proxies=proxy)
        if 300 <= rsp.status_code <= 400:
            if "location" in rsp.headers:
                brief.preview_img_url = rsp.headers["location"]
        elif rsp.status_code == 200:
            brief.preview_img_url = img.attrs["src"]

        return brief
Beispiel #4
0
    def get_brief_from_a_card(card_tag):
        release_date, _ = try_evaluate(
            lambda: datetime.datetime.strptime(
                re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d"
            )
        )

        actress = list(
            map(
                lambda x: x.text,
                card_tag.find_all(name="a", attrs={"class": "btn-danger"}),
            )
        )

        img, _ = try_evaluate(lambda: card_tag.find(name="img").attrs["data-src"])
        if not img.startswith("http"):
            img = "http:" + img

        brief = Brief()
        brief.preview_img_url = img
        brief.title, _ = try_evaluate(lambda: card_tag.find(name="h5").text.strip(), "")
        brief.actress = ", ".join(actress)
        brief.set_release_date(release_date)
        brief.code = card_tag.find(name="h4").text.strip()

        return brief
Beispiel #5
0
 def __get_brief_from_tr(mcs, tr):
     brief = Brief()
     brief.preview_img_url = noexcept(lambda: tr.attrs["data-img"])
     if brief.preview_img_url and brief.preview_img_url.startswith("/"):
         brief.preview_img_url = "http://warashi-asian-pornstars.fr" + brief.preview_img_url
     tds = tr.select("td")
     brief.title = tds[1].text.strip()
     brief.code = tds[2].text.upper()
     brief.release_date = tds[5].text.strip()
     return brief
Beispiel #6
0
    def __get_brief_by_card(card):
        columns = card.select(".column")
        code = columns[4].next.strip()
        actress = ", ".join(
            (x.text.strip() for x in columns[2].find_all(name="span")))
        title = columns[3].text.strip()
        img, _ = try_evaluate(lambda: columns[3].a.attrs["rel"][0])
        release_date = columns[1].text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.set_release_date(release_date)
        return brief
Beispiel #7
0
    def __get_brief_from_card(card):
        code = card.select(".tag.is-link.is-light")[0].text.strip()
        actress = ", ".join(
            (x.text.strip() for x in card.select(".tag.is-primary.is-light")))
        h5 = card.select(".title")[0]
        title = h5.text.strip()
        img = noexcept(lambda: h5.a.attrs["rel"][0])
        release_date = card.select("footer")[0].p.text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.release_date = release_date
        return brief
Beispiel #8
0
    def __get_brief_by_card(card):
        columns = card.select(".column")
        if not columns:  # like 飯岡かなこ
            return None
        code = columns[4].next.strip()
        actress = ", ".join(
            (x.text.strip() for x in columns[2].find_all(name="span")))
        title = columns[3].text.strip()
        img = noexcept(lambda: columns[3].a.attrs["rel"][0])
        release_date = columns[1].text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.release_date = release_date
        return brief
Beispiel #9
0
    def get_brief_from_a_card(card_tag):
        release_date, _ = try_evaluate(lambda: datetime.datetime.strptime(
            re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0),
            "%Y-%m-%d"))

        actress = list(
            map(lambda x: x.text,
                card_tag.find_all(name='a', attrs={'class': 'btn-danger'})))

        img, _ = try_evaluate(lambda: card_tag.find(name='img').attrs['src'])
        if not img.startswith("http:"):
            img = "http:" + img

        brief = Brief()
        brief.preview_img_url = img
        brief.title, _ = try_evaluate(
            lambda: card_tag.find(name='h5').text.strip(), "")
        brief.actress = ", ".join(actress)
        brief.set_release_date(release_date)
        brief.code = card_tag.find(name='h4').text.strip()

        return brief
Beispiel #10
0
    def __get_brief_by_box(box):
        code = box.find(name='span', attrs={'class': 'video_id'}).text
        div = box.find(name='div', attrs={'class': 'col-sm-7'})
        actress = ", ".join(
            map(lambda x: x.text,
                div.find_all(name='div', attrs={'class': 'col-xs-6'})))
        title = div.find(name='span', attrs={'class': 'video_title'}).text
        img, _ = try_evaluate(
            lambda: div.find(name='span', attrs={
                'class': 'preview_btn'
            }).attrs['rel'])
        release_date = box.find(name='div', attrs={
            'class': 'col-sm-2'
        }).span.text

        brief = Brief()
        brief.title = title.strip()
        brief.preview_img_url = img
        brief.code = code.strip()
        brief.actress = actress.strip()
        brief.set_release_date(release_date)
        return brief
    def get_brief(mcs, code):
        url = "http://warashi-asian-pornstars.fr/en/s-12/search"

        payload = "recherche_critere=v&recherche_valeur=" + code
        headers = {'content-type': "application/x-www-form-urlencoded"}

        response = requests.post(url,
                                 data=payload,
                                 headers=headers,
                                 proxies=proxy)
        bs = bs4.BeautifulSoup(response.text, "lxml")
        div = bs.select(".resultat-film")[0]
        url = "http://warashi-asian-pornstars.fr" + div.a.attrs['href']

        response = requests.get(url)
        bs = bs4.BeautifulSoup(response.text, "lxml")
        div = bs.select("#fiche-film-infos")[0]
        ps = div.find_all(name='p')

        brief = Brief()
        brief.preview_img_url = "http://warashi-asian-pornstars.fr" + bs.select(
            'video')[0].attrs["poster"]
        brief.code = code

        for p in ps:
            text = p.text
            if ":" not in text:
                continue
            tokens = text.split(":")
            if len(tokens) != 2:
                continue
            k, v = tokens
            if k == "original title":
                brief.title = v.strip()
            if k == "release date":
                brief.release_date = datetime.datetime.strptime(
                    v.strip(), "%B %d, %Y")

        div = bs.select("#casting-f")[0]
        brief.actress = ",".join((p.text for p in div.select(".ja")))

        return brief