Beispiel #1
0
    def get_brief_from_a_card(card_tag):
        release_date, _ = try_evaluate(lambda: datetime.datetime.strptime(
            re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0),
            "%Y-%m-%d"))

        actress = list(
            map(
                lambda x: x.text,
                card_tag.find_all(name="a", attrs={"class": "btn-danger"}),
            ))

        img, _ = try_evaluate(
            lambda: card_tag.find(name="img").attrs["data-src"])
        if not img.startswith("http"):
            img = "http:" + img

        brief = Brief()
        brief.preview_img_url = img
        brief.title, _ = try_evaluate(
            lambda: card_tag.find(name="h5").text.strip(), "")
        brief.actress = ", ".join(actress)
        brief.release_date = release_date
        brief.code = card_tag.find(name="h4").text.strip()

        return brief
Beispiel #2
0
 def set_release_date(self, release_date):
     if isinstance(release_date, datetime.datetime):
         self.release_date = release_date
     else:
         self.release_date, _ = try_evaluate(
             lambda: datetime.datetime.strptime(release_date, "%Y-%m-%d"),
             None)
Beispiel #3
0
    def get_brief(cls, code):
        url = "https://avsox.host/cn/search/" + code
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        match = re.search(cls.__url_pattern, html)
        if not match:
            return None
        url = match.group(1)
        rsp = requests.get(url, proxies=proxy)
        html = rsp.text

        bs = bs4.BeautifulSoup(html, "lxml")
        movie = bs.select(".movie")[0]

        brief = Brief()
        brief.code = code
        img = movie.select(".screencap", limit=1)[0].a.img
        brief.title = img.attrs["title"]

        brief.release_date = try_evaluate(
            lambda: re.search(cls.__release_date_pattern, str(movie)).group(1),
            "")[0]

        brief.actress = ", ".join(x.text for x in bs.select(
            "#avatar-waterfall", limit=1)[0].find_all("span"))

        rsp = requests.get(img.attrs["src"], proxies=proxy)
        if 300 <= rsp.status_code <= 400:
            if "location" in rsp.headers:
                brief.preview_img_url = rsp.headers["location"]
        elif rsp.status_code == 200:
            brief.preview_img_url = img.attrs["src"]

        return brief
Beispiel #4
0
    def get_brief(cls, code):
        url = "https://avsox.net/cn/search/" + code
        rsp = requests.get(url)
        html = rsp.text

        url = re.search(cls.__url_pattern, html).group(1)
        rsp = requests.get(url)
        html = rsp.text

        bs = bs4.BeautifulSoup(html, "lxml")
        movie = bs.select(".movie")[0]

        brief = Brief()
        brief.code = code
        img = movie.select(".screencap", limit=1)[0].a.img
        brief.title = img.attrs['title']

        brief.set_release_date(
            try_evaluate(
                lambda: re.search(cls.__release_date_pattern, str(movie)).
                group(1), "")[0])
        brief.actress = ", ".join(x.text for x in bs.select(
            "#avatar-waterfall", limit=1)[0].find_all('span'))

        rsp = requests.get(img.attrs['src'])
        if 300 <= rsp.status_code <= 400:
            if "location" in rsp.headers:
                brief.preview_img_url = rsp.headers['location']
        elif rsp.status_code == 200:
            brief.preview_img_url = img.attrs['src']

        return brief
Beispiel #5
0
 def __get_newly_released_from_sources(cls, page):
     if cls.which_source != -1:
         res, ex = try_evaluate(lambda: Sources.NewlyReleased[cls.which_source].get_newly_released(page))
         if (not res) or ex:
             return cls.__find_usable_source(page)
         return res
     else:
         return cls.__find_usable_source(page)
Beispiel #6
0
 def get_newly_released_from_sources(cls, page):
     res, ex = try_evaluate(lambda: cls.sources[cls.which_source].get_newly_released(page))
     if (not res) or ex:
         cls.which_source += 1
         if cls.which_source == len(cls.sources):
             raise Exception("all sources are down")
         return cls.get_newly_released_from_sources(page)  # fallback choice
     else:
         return res
Beispiel #7
0
 def __find_usable_source(cls, page):
     for i, source in enumerate(Sources.NewlyReleased):
         res, ex = try_evaluate(lambda: Sources.NewlyReleased[cls.which_source].get_newly_released(page))
         if (not res) or ex:
             continue
         else:
             cls.which_source = i
             return res
     raise Exception("all sources are down")
Beispiel #8
0
 def get_history_names_by_li(cls, li):
     url = try_evaluate(
         lambda: re.search(Etigoya.url_pattern, str(li)).group(0))[0]
     if not url:
         return []
     html = requests.get(url).text
     names = [
         re.sub(Etigoya.purify_pattern, "", s).strip()
         for s in re.findall(Etigoya.name_pattern, html)
     ]
     return names
Beispiel #9
0
    def get_brief_from_a_card(card_tag):
        release_date, _ = try_evaluate(lambda: datetime.datetime.strptime(
            re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0),
            "%Y-%m-%d"))

        actress = list(
            map(lambda x: x.text,
                card_tag.find_all(name='a', attrs={'class': 'btn-danger'})))

        img, _ = try_evaluate(lambda: card_tag.find(name='img').attrs['src'])
        if not img.startswith("http:"):
            img = "http:" + img

        brief = Brief()
        brief.preview_img_url = img
        brief.title, _ = try_evaluate(
            lambda: card_tag.find(name='h5').text.strip(), "")
        brief.actress = ", ".join(actress)
        brief.set_release_date(release_date)
        brief.code = card_tag.find(name='h4').text.strip()

        return brief
    def __check_name_in_box(cls, name, box):
        if name not in box.text.lower():
            return None
        title = box.find(name="p").text.lower()
        jp_name = title.split("-")[1].strip()
        if not jp_name:
            return None

        # cache for later parsing actress info, None for no url
        url, _ = try_evaluate(lambda: box.a.attrs["href"])
        detail_url = "http://warashi-asian-pornstars.fr/%s" % url
        cls.__actress_detail_url[name] = detail_url
        cls.__actress_detail_url[jp_name] = detail_url
        return jp_name
Beispiel #11
0
 def run(self):
     self.task.status = Task.RUNNING
     self.task.result, ex = try_evaluate(
         lambda: self.task.target(*self.task.args, **self.task.kwargs))
     Master.finish_task(self.task.id)
     if ex and self.task.catch_cb:
         self.task.catch_cb(ex)
         self.task.status = Task.FAILED
     elif self.task.then_cb:
         res = self.task.result
         self.task.then_cb(res)
     if self.task.result is not None:
         self.task.status = Task.SUCCESS
     else:
         self.task.status = Task.FAILED
Beispiel #12
0
    def __get_brief_by_card(card):
        columns = card.select(".column")
        code = columns[4].next.strip()
        actress = ", ".join(
            (x.text.strip() for x in columns[2].find_all(name="span")))
        title = columns[3].text.strip()
        img, _ = try_evaluate(lambda: columns[3].a.attrs["rel"][0])
        release_date = columns[1].text.strip()

        brief = Brief()
        brief.title = title
        brief.preview_img_url = img
        brief.code = code
        brief.actress = actress
        brief.set_release_date(release_date)
        return brief
Beispiel #13
0
    def __get_brief_by_box(box):
        code = box.find(name='span', attrs={'class': 'video_id'}).text
        div = box.find(name='div', attrs={'class': 'col-sm-7'})
        actress = ", ".join(
            map(lambda x: x.text,
                div.find_all(name='div', attrs={'class': 'col-xs-6'})))
        title = div.find(name='span', attrs={'class': 'video_title'}).text
        img, _ = try_evaluate(
            lambda: div.find(name='span', attrs={
                'class': 'preview_btn'
            }).attrs['rel'])
        release_date = box.find(name='div', attrs={
            'class': 'col-sm-2'
        }).span.text

        brief = Brief()
        brief.title = title.strip()
        brief.preview_img_url = img
        brief.code = code.strip()
        brief.actress = actress.strip()
        brief.set_release_date(release_date)
        return brief
Beispiel #14
0
    def run(self):
        while True:
            self.event.wait()
            self.task.status = Task.RUNNING
            self.task.result, ex = try_evaluate(
                lambda: self.task.target(*self.task.args, **self.task.kwargs))
            if ex and self.task.catch_cb:
                self.task.catch_cb(ex)
                self.task.status = Task.FAILED
            elif self.task.then_cb:
                self.task.then_cb(self.task.result)
            if self.task.result is not None:
                self.task.status = Task.SUCCESS
            else:
                self.task.status = Task.FAILED

            if self.task.task_group:
                self.task.task_group.finished_cnt += 1
                if self.task.status == Task.FAILED:
                    self.task.task_group.failed_cnt += 1
                else:
                    self.task.task_group.success_cnt += 1
            self.event.clear()
            self.master.semaphore.release()
Beispiel #15
0
    def search_by_code(cls, code):
        url = "http://www5.javmost.com/" + code + "/"
        main_rsp = cls.__client.get(url, proxies=proxy)
        if main_rsp.status_code != 200:
            return None

        img, _ = try_evaluate(
            lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"",
                              main_rsp.text).group(1))

        if not img:
            return None

        # Nov. 13 adding: https://www5.javmost.com/IENE-623/
        if not img.startswith("http:"):
            img = "http:" + img

        bs = bs4.BeautifulSoup(main_rsp.text, "lxml")

        buttons = bs.select(".tab-overflow")[0].find_all(name="li")[1:-1]
        success = False

        var_value = re.search("'value':(.+?),", main_rsp.text).group(1)
        value = re.search("var %s = '(.+?)'" % var_value,
                          main_rsp.text).group(1)

        for button in buttons:
            params = re.search(r"select_part\((.+?)\)",
                               button.a.attrs["onclick"]).group(1)
            tokens = params.split(",")
            group = tokens[1].replace("'", "")
            part = tokens[0].replace("'", "")
            _code = tokens[4].replace("'", "")
            code2 = tokens[5].replace("'", "")
            code3 = tokens[6].replace("'", "")
            sound = re.search("'sound':'(.+?)'", main_rsp.text).group(1)

            data = urlencode(
                {
                    'group': group,
                    'part': part,
                    'code': _code,
                    'code2': code2,
                    'code3': code3,
                    'value': value,
                    'sound': sound
                },
                quote_via=quote_plus)

            rsp = cls.__client.post(
                "https://www5.javmost.com/get_movie_source/",
                headers={
                    'content-type':
                    "application/x-www-form-urlencoded; charset=UTF-8"
                },
                data=data,
                proxies=proxy)

            json_obj = json.loads(rsp.text)
            url = json_obj["data"][0]

            url = decode(url)

            if not url:
                continue

            if cls.__client.get(url, proxies=proxy).status_code == 200:
                success = True
                break

        if not success:
            return None

        av = AV()
        av.preview_img_url = img
        av.video_url = url
        av.code = code

        return av
Beispiel #16
0
 def release_date(self, date: Union[str, datetime.datetime]):
     if isinstance(date, datetime.datetime):
         self.__release_date = date
     else:
         self.__release_date, _ = try_evaluate(
             lambda: datetime.datetime.strptime(date, "%Y-%m-%d"), None)
Beispiel #17
0
    def search_by_code(cls, code):
        url = "http://www5.javmost.com/" + code
        main_rsp = cls.__client.get(url)
        if main_rsp.status_code != 200:
            return None

        img, _ = try_evaluate(
            lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"",
                              main_rsp.text).group(1))

        if not img:
            return None

        # Nov. 13 adding: https://www5.javmost.com/IENE-623/
        if not img.startswith("http:"):
            img = "http:" + img

        bs = bs4.BeautifulSoup(main_rsp.text, "lxml")

        buttons = bs.select('.tab-overflow')[0].find_all(name='li')[1:-1]
        success = False

        for button in buttons:
            params = re.search(r"select_part\((.+?)\)",
                               button.a.attrs['onclick']).group(1)
            e, t, a, o, l, r, d = [
                x.replace("\'", "") for x in params.split(",")
            ]

            data = re.search(r"get_source/\",(.+?)\}", main_rsp.text,
                             re.S).group(1)
            value = re.search(r"value: \"(.+?)\",", data).group(1)
            sound = re.search(r"sound: \"(.+?)\",", data).group(1)

            url = "https://www5.javmost.com/get_code/"
            rsp = cls.__client.post(url, data={"code": value})
            _code = rsp.text

            url = "https://www5.javmost.com/get_source/"
            rsp = cls.__client.post(url,
                                    data={
                                        "group": t,
                                        "part": e,
                                        "code": l,
                                        "code2": r,
                                        "code3": d,
                                        "value": value,
                                        "sound": sound,
                                        "code4": _code
                                    })

            json_obj = json.loads(rsp.text)
            url = json_obj["data"][0]

            url = decode(url)

            if cls.__client.get(url).status_code == 200:
                success = True
                break

        if not success:
            return None

        av = AV()
        av.preview_img_url = img
        av.video_url = url
        av.code = code

        return av