def get_brief(mcs, code): url = "https://avsox.host/cn/search/" + code rsp = requests.get(url, proxies=proxy) html = rsp.text match = re.search(mcs.__url_pattern, html) if not match: return None url = match.group(1) rsp = requests.get(url, proxies=proxy) html = rsp.text bs = bs4.BeautifulSoup(html, "lxml") movie = bs.select(".movie")[0] brief = Brief() brief.code = code img = movie.select(".screencap", limit=1)[0].a.img brief.title = img.attrs["title"] brief.release_date = noexcept( lambda: re.search(mcs.__release_date_pattern, str(movie)).group(1), "") brief.actress = ", ".join(x.text for x in bs.select( "#avatar-waterfall", limit=1)[0].find_all("span")) rsp = requests.get(img.attrs["src"], proxies=proxy) if 300 <= rsp.status_code <= 400: if "location" in rsp.headers: brief.preview_img_url = rsp.headers["location"] elif rsp.status_code == 200: brief.preview_img_url = img.attrs["src"] return brief
def release_date(self, date: Union[str, datetime.datetime]): if isinstance(date, datetime.datetime): self.__release_date = date else: self.__release_date = noexcept( lambda: datetime.datetime.strptime(date, "%Y-%m-%d"), None )
def __get_brief_from_tr(mcs, tr): brief = Brief() brief.preview_img_url = noexcept(lambda: tr.attrs["data-img"]) if brief.preview_img_url and brief.preview_img_url.startswith("/"): brief.preview_img_url = "http://warashi-asian-pornstars.fr" + brief.preview_img_url tds = tr.select("td") brief.title = tds[1].text.strip() brief.code = tds[2].text.upper() brief.release_date = tds[5].text.strip() return brief
def get_history_names_by_li(mcs, li): url = noexcept( lambda: re.search(Etigoya.url_pattern, str(li)).group(0)) if not url: return [] html = requests.get(url, proxies=proxy).text names = [ re.sub(Etigoya.purify_pattern, "", s).strip() for s in re.findall(Etigoya.name_pattern, html) ] return names
def get_brief_from_a_card(card_tag): release_date = noexcept(lambda: datetime.datetime.strptime( re.search(r"\d\d\d\d-\d\d-\d\d", card_tag.text).group(0), "%Y-%m-%d")) actress = list( map(lambda x: x.text, card_tag.find_all(name="a", attrs={"class": "btn-danger"}))) img = noexcept(lambda: card_tag.find(name="img").attrs["data-src"]) if not img.startswith("http"): img = "http:" + img brief = Brief() brief.preview_img_url = img brief.title = noexcept(lambda: card_tag.find(name="h5").text.strip(), "") brief.actress = ", ".join(actress) brief.release_date = release_date brief.code = card_tag.find(name="h4").text.strip() return brief
def __check_name_in_box(mcs, name, box): if name not in box.text.lower(): return None title = box.find(name="p").text.lower() jp_name = title.split("-")[1].strip() if not jp_name: return None # cache for later parsing actress info, None for no url url = noexcept(lambda: box.a.attrs["href"]) detail_url = "http://warashi-asian-pornstars.fr/%s" % url mcs.__actress_detail_url[name] = detail_url mcs.__actress_detail_url[jp_name] = detail_url return jp_name
def __get_brief_from_card(card): code = card.select(".tag.is-link.is-light")[0].text.strip() actress = ", ".join( (x.text.strip() for x in card.select(".tag.is-primary.is-light"))) h5 = card.select(".title")[0] title = h5.text.strip() img = noexcept(lambda: h5.a.attrs["rel"][0]) release_date = card.select("footer")[0].p.text.strip() brief = Brief() brief.title = title brief.preview_img_url = img brief.code = code brief.actress = actress brief.release_date = release_date return brief
def __get_brief_by_card(card): columns = card.select(".column") if not columns: # like 飯岡かなこ return None code = columns[4].next.strip() actress = ", ".join( (x.text.strip() for x in columns[2].find_all(name="span"))) title = columns[3].text.strip() img = noexcept(lambda: columns[3].a.attrs["rel"][0]) release_date = columns[1].text.strip() brief = Brief() brief.title = title brief.preview_img_url = img brief.code = code brief.actress = actress brief.release_date = release_date return brief
def search_by_code(mcs, code): url = "http://www5.javmost.com/" + code + "/" main_rsp = mcs.__client.get(url, proxies=proxy) if main_rsp.status_code != 200: return None img = noexcept( lambda: re.search(r"<meta property=\"og:image\" content=\"(.+?)\"", main_rsp.text).group(1)) if not img: return None # Nov. 13 adding: https://www5.javmost.com/IENE-623/ if not img.startswith("http:"): img = "http:" + img bs = bs4.BeautifulSoup(main_rsp.text, "lxml") buttons = bs.select(".tab-overflow")[0].find_all(name="li")[1:-1] var_value = re.search("'value':(.+?),", main_rsp.text).group(1) value = re.search("var %s = '(.+?)'" % var_value, main_rsp.text).group(1) url = wait_until([ submit(mcs.__try_one_button, button, value, main_rsp) for button in buttons ]) if not url: return None av = AV() av.preview_img_url = img av.video_url = url av.code = code return av
def processor(*x, **kwx): _args = args for i in args_place_holders: _args[i] = x[i] return noexcept(lambda: func(*_args, **fixed_kwargs, **kwx))