def get_brief_from_a_card(card_tag): release_date, _ = try_evaluate(lambda: datetime.datetime.strptime( re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d")) actress = list( map( lambda x: x.text, card_tag.find_all(name="a", attrs={"class": "btn-danger"}), )) img, _ = try_evaluate( lambda: card_tag.find(name="img").attrs["data-src"]) if not img.startswith("http"): img = "http:" + img brief = Brief() brief.preview_img_url = img brief.title, _ = try_evaluate( lambda: card_tag.find(name="h5").text.strip(), "") brief.actress = ", ".join(actress) brief.release_date = release_date brief.code = card_tag.find(name="h4").text.strip() return brief
def set_release_date(self, release_date): if isinstance(release_date, datetime.datetime): self.release_date = release_date else: self.release_date, _ = try_evaluate( lambda: datetime.datetime.strptime(release_date, "%Y-%m-%d"), None)
def get_brief(cls, code): url = "https://avsox.host/cn/search/" + code rsp = requests.get(url, proxies=proxy) html = rsp.text match = re.search(cls.__url_pattern, html) if not match: return None url = match.group(1) rsp = requests.get(url, proxies=proxy) html = rsp.text bs = bs4.BeautifulSoup(html, "lxml") movie = bs.select(".movie")[0] brief = Brief() brief.code = code img = movie.select(".screencap", limit=1)[0].a.img brief.title = img.attrs["title"] brief.release_date = try_evaluate( lambda: re.search(cls.__release_date_pattern, str(movie)).group(1), "")[0] brief.actress = ", ".join(x.text for x in bs.select( "#avatar-waterfall", limit=1)[0].find_all("span")) rsp = requests.get(img.attrs["src"], proxies=proxy) if 300 <= rsp.status_code <= 400: if "location" in rsp.headers: brief.preview_img_url = rsp.headers["location"] elif rsp.status_code == 200: brief.preview_img_url = img.attrs["src"] return brief
def get_brief(cls, code): url = "https://avsox.net/cn/search/" + code rsp = requests.get(url) html = rsp.text url = re.search(cls.__url_pattern, html).group(1) rsp = requests.get(url) html = rsp.text bs = bs4.BeautifulSoup(html, "lxml") movie = bs.select(".movie")[0] brief = Brief() brief.code = code img = movie.select(".screencap", limit=1)[0].a.img brief.title = img.attrs['title'] brief.set_release_date( try_evaluate( lambda: re.search(cls.__release_date_pattern, str(movie)). group(1), "")[0]) brief.actress = ", ".join(x.text for x in bs.select( "#avatar-waterfall", limit=1)[0].find_all('span')) rsp = requests.get(img.attrs['src']) if 300 <= rsp.status_code <= 400: if "location" in rsp.headers: brief.preview_img_url = rsp.headers['location'] elif rsp.status_code == 200: brief.preview_img_url = img.attrs['src'] return brief
def __get_newly_released_from_sources(cls, page): if cls.which_source != -1: res, ex = try_evaluate(lambda: Sources.NewlyReleased[cls.which_source].get_newly_released(page)) if (not res) or ex: return cls.__find_usable_source(page) return res else: return cls.__find_usable_source(page)
def get_newly_released_from_sources(cls, page): res, ex = try_evaluate(lambda: cls.sources[cls.which_source].get_newly_released(page)) if (not res) or ex: cls.which_source += 1 if cls.which_source == len(cls.sources): raise Exception("all sources are down") return cls.get_newly_released_from_sources(page) # fallback choice else: return res
def __find_usable_source(cls, page): for i, source in enumerate(Sources.NewlyReleased): res, ex = try_evaluate(lambda: Sources.NewlyReleased[cls.which_source].get_newly_released(page)) if (not res) or ex: continue else: cls.which_source = i return res raise Exception("all sources are down")
def get_history_names_by_li(cls, li): url = try_evaluate( lambda: re.search(Etigoya.url_pattern, str(li)).group(0))[0] if not url: return [] html = requests.get(url).text names = [ re.sub(Etigoya.purify_pattern, "", s).strip() for s in re.findall(Etigoya.name_pattern, html) ] return names
def get_brief_from_a_card(card_tag): release_date, _ = try_evaluate(lambda: datetime.datetime.strptime( re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d")) actress = list( map(lambda x: x.text, card_tag.find_all(name='a', attrs={'class': 'btn-danger'}))) img, _ = try_evaluate(lambda: card_tag.find(name='img').attrs['src']) if not img.startswith("http:"): img = "http:" + img brief = Brief() brief.preview_img_url = img brief.title, _ = try_evaluate( lambda: card_tag.find(name='h5').text.strip(), "") brief.actress = ", ".join(actress) brief.set_release_date(release_date) brief.code = card_tag.find(name='h4').text.strip() return brief
def __check_name_in_box(cls, name, box): if name not in box.text.lower(): return None title = box.find(name="p").text.lower() jp_name = title.split("-")[1].strip() if not jp_name: return None # cache for later parsing actress info, None for no url url, _ = try_evaluate(lambda: box.a.attrs["href"]) detail_url = "http://warashi-asian-pornstars.fr/%s" % url cls.__actress_detail_url[name] = detail_url cls.__actress_detail_url[jp_name] = detail_url return jp_name
def run(self): self.task.status = Task.RUNNING self.task.result, ex = try_evaluate( lambda: self.task.target(*self.task.args, **self.task.kwargs)) Master.finish_task(self.task.id) if ex and self.task.catch_cb: self.task.catch_cb(ex) self.task.status = Task.FAILED elif self.task.then_cb: res = self.task.result self.task.then_cb(res) if self.task.result is not None: self.task.status = Task.SUCCESS else: self.task.status = Task.FAILED
def __get_brief_by_card(card): columns = card.select(".column") code = columns[4].next.strip() actress = ", ".join( (x.text.strip() for x in columns[2].find_all(name="span"))) title = columns[3].text.strip() img, _ = try_evaluate(lambda: columns[3].a.attrs["rel"][0]) release_date = columns[1].text.strip() brief = Brief() brief.title = title brief.preview_img_url = img brief.code = code brief.actress = actress brief.set_release_date(release_date) return brief
def __get_brief_by_box(box): code = box.find(name='span', attrs={'class': 'video_id'}).text div = box.find(name='div', attrs={'class': 'col-sm-7'}) actress = ", ".join( map(lambda x: x.text, div.find_all(name='div', attrs={'class': 'col-xs-6'}))) title = div.find(name='span', attrs={'class': 'video_title'}).text img, _ = try_evaluate( lambda: div.find(name='span', attrs={ 'class': 'preview_btn' }).attrs['rel']) release_date = box.find(name='div', attrs={ 'class': 'col-sm-2' }).span.text brief = Brief() brief.title = title.strip() brief.preview_img_url = img brief.code = code.strip() brief.actress = actress.strip() brief.set_release_date(release_date) return brief
def run(self): while True: self.event.wait() self.task.status = Task.RUNNING self.task.result, ex = try_evaluate( lambda: self.task.target(*self.task.args, **self.task.kwargs)) if ex and self.task.catch_cb: self.task.catch_cb(ex) self.task.status = Task.FAILED elif self.task.then_cb: self.task.then_cb(self.task.result) if self.task.result is not None: self.task.status = Task.SUCCESS else: self.task.status = Task.FAILED if self.task.task_group: self.task.task_group.finished_cnt += 1 if self.task.status == Task.FAILED: self.task.task_group.failed_cnt += 1 else: self.task.task_group.success_cnt += 1 self.event.clear() self.master.semaphore.release()
def search_by_code(cls, code): url = "http://www5.javmost.com/" + code + "/" main_rsp = cls.__client.get(url, proxies=proxy) if main_rsp.status_code != 200: return None img, _ = try_evaluate( lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"", main_rsp.text).group(1)) if not img: return None # Nov. 13 adding: https://www5.javmost.com/IENE-623/ if not img.startswith("http:"): img = "http:" + img bs = bs4.BeautifulSoup(main_rsp.text, "lxml") buttons = bs.select(".tab-overflow")[0].find_all(name="li")[1:-1] success = False var_value = re.search("'value':(.+?),", main_rsp.text).group(1) value = re.search("var %s = '(.+?)'" % var_value, main_rsp.text).group(1) for button in buttons: params = re.search(r"select_part\((.+?)\)", button.a.attrs["onclick"]).group(1) tokens = params.split(",") group = tokens[1].replace("'", "") part = tokens[0].replace("'", "") _code = tokens[4].replace("'", "") code2 = tokens[5].replace("'", "") code3 = tokens[6].replace("'", "") sound = re.search("'sound':'(.+?)'", main_rsp.text).group(1) data = urlencode( { 'group': group, 'part': part, 'code': _code, 'code2': code2, 'code3': code3, 'value': value, 'sound': sound }, quote_via=quote_plus) rsp = cls.__client.post( "https://www5.javmost.com/get_movie_source/", headers={ 'content-type': "application/x-www-form-urlencoded; charset=UTF-8" }, data=data, proxies=proxy) json_obj = json.loads(rsp.text) url = json_obj["data"][0] url = decode(url) if not url: continue if cls.__client.get(url, proxies=proxy).status_code == 200: success = True break if not success: return None av = AV() av.preview_img_url = img av.video_url = url av.code = code return av
def release_date(self, date: Union[str, datetime.datetime]): if isinstance(date, datetime.datetime): self.__release_date = date else: self.__release_date, _ = try_evaluate( lambda: datetime.datetime.strptime(date, "%Y-%m-%d"), None)
def search_by_code(cls, code): url = "http://www5.javmost.com/" + code main_rsp = cls.__client.get(url) if main_rsp.status_code != 200: return None img, _ = try_evaluate( lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"", main_rsp.text).group(1)) if not img: return None # Nov. 13 adding: https://www5.javmost.com/IENE-623/ if not img.startswith("http:"): img = "http:" + img bs = bs4.BeautifulSoup(main_rsp.text, "lxml") buttons = bs.select('.tab-overflow')[0].find_all(name='li')[1:-1] success = False for button in buttons: params = re.search(r"select_part\((.+?)\)", button.a.attrs['onclick']).group(1) e, t, a, o, l, r, d = [ x.replace("\'", "") for x in params.split(",") ] data = re.search(r"get_source/\",(.+?)\}", main_rsp.text, re.S).group(1) value = re.search(r"value: \"(.+?)\",", data).group(1) sound = re.search(r"sound: \"(.+?)\",", data).group(1) url = "https://www5.javmost.com/get_code/" rsp = cls.__client.post(url, data={"code": value}) _code = rsp.text url = "https://www5.javmost.com/get_source/" rsp = cls.__client.post(url, data={ "group": t, "part": e, "code": l, "code2": r, "code3": d, "value": value, "sound": sound, "code4": _code }) json_obj = json.loads(rsp.text) url = json_obj["data"][0] url = decode(url) if cls.__client.get(url).status_code == 200: success = True break if not success: return None av = AV() av.preview_img_url = img av.video_url = url av.code = code return av