def _get_content_from_data(self, data, session: Session, self_html: str, post_hint: str) -> str: content: str = "" url_overridden_by_dest: str = get_node_value_if_exists( data, "url_overridden_by_dest") if len(url_overridden_by_dest ) > 0 and url_overridden_by_dest[:1] == '/': url_overridden_by_dest = "https://www.reddit.com" + url_overridden_by_dest preview_image: Optional[str] = cast( Optional[str], get_node(data, "preview", "images", 0, "source", "url")) is_gallery: str = str(get_node_value_if_exists(data, "is_gallery")) domain: Optional[str] = cast(str, get_node(data, "domain")) if self_html != "": content += html.unescape(self_html) if is_gallery == "True": content += self._manage_gallery(data) c: Optional[str] = self._manage_external_content( session, url_overridden_by_dest, post_hint, preview_image, domain, data) if c is not None: content += c content = self._manage_reddit_preview_images(content) content = content.replace("<video ", "<video controls ") return content
def get_feed(self, parameters: dict, session: requests.Session) -> str: items: str = "" if "criteria" in parameters: url = "%s%s" % (self.get_original_website(), unquote_plus(parameters["criteria"])) page = session.get(url) json_obj: Optional[dict] = self._load_json(page.text, "__REDIAL_PROPS__ = ") if json_obj is not None and len( json_obj["root"]) > 5 and "data" in json_obj["root"][ 5] and "ads" in json_obj["root"][5]["data"]: for card in json_obj["root"][5]["data"]["ads"]: location: str = self._get_location(card) small_description: str = get_node_value_if_exists( card, "subject") description: str = get_node_value_if_exists(card, "body") url_detail: str = get_node_value_if_exists(card, "url") price: str = self._get_price(card) publication_date: str = get_node_value_if_exists( card, "first_publication_date") img_url: str = "" other_imgs: str = "" img_url, other_imgs = self._process_images(card) if price != "": items += """<item> <title><![CDATA[%s - %s - %s]]></title> <description> <![CDATA[ <img src="%s"/><p>%s - %s - %s</p> %s ]]> </description> <link> %s </link> <pubDate>%s</pubDate> </item>""" % (location, price, small_description, img_url, location, price, description, other_imgs, self.get_handler_url_with_parameters( {"url": url_detail}), publication_date) return """<rss version="2.0"> <channel> <title>Le bon coin</title> <language>fr-FR</language> %s </channel> </rss>""" % items
def _get_price(self, entry: dict) -> str: price: str = "" p = get_node_value_if_exists(entry, "price") if isinstance(p, int): price = "%s €" % "{:,}".format(p).replace(",", " ") return price
def get_feed(self, parameters: dict, session: requests.Session) -> str: items: str = "" if "criteria" in parameters: url = "%s%s" % (self.get_original_website(), unquote_plus(parameters["criteria"])) page = session.get(url) json_obj = json.loads(page.text) if json_obj is not None and "realEstateAds" in json_obj: for entry in json_obj["realEstateAds"]: location: str = get_node_value_if_exists(entry, "city") price: str = self._get_price(entry) small_description: str = get_node_value_if_exists( entry, "title") description: str = get_node_value_if_exists( entry, "description") url_detail: str = "https://www.bienici.com/realEstateAd.json?id=%s" % get_node_value_if_exists( entry, "id") img_urls: List[str] = self._get_img_urls(entry) items += """<item> <title><![CDATA[%s - %s - %s]]></title> <description> <![CDATA[ <img src="%s"/><p>%s - %s - %s</p> %s %s ]]> </description> <link> %s </link> </item>""" % (location, price, small_description, img_urls[0] if len(img_urls) > 0 else "", location, price, small_description, description, self._build_imgs(img_urls), self.get_handler_url_with_parameters( {"url": url_detail})) return """<rss version="2.0"> <channel> <title>Bien Ici</title> <language>fr-FR</language> %s </channel> </rss>""" % items
def get_content(self, url: str, parameters: dict, session: requests.Session) -> PyRSSWContent: content: str = "" page = session.get(url=url) json_obj: Optional[dict] = self._load_json( page.text, "__NEXT_DATA__\" type=\"application/json\">") if json_obj is not None and "props" in json_obj[ "root"] and "pageProps" in json_obj["root"][ "props"] and "ad" in json_obj["root"]["props"]["pageProps"]: node = json_obj["root"]["props"]["pageProps"]["ad"] content = "<p><b>%s</b></p>" % get_node_value_if_exists( node, "subject") content += "<p><b>%s</b></p>" % self._get_price(node) content += "<p>%s</p>" % self._get_location(node) content += "<hr/>" content += "<b>%s</b>" % get_node_value_if_exists(node, "body") content += "<hr/>" other_imgs: str = "" _, other_imgs = self._process_images(node) content += other_imgs content += "<hr/>" content += "<p>%s</p>" % get_node_value_if_exists( node, "category_name") content += "<hr/>" if "attributes" in node: for attribute in node["attributes"]: key_label: str = get_node_value_if_exists( attribute, "key_label") if key_label != "": content += "<p><strong>%s</strong>: %s</p>" % ( key_label, get_node_value_if_exists(attribute, "value_label")) return PyRSSWContent(""" <div class=\"main-content\"> %s </div>""" % (content))
def get_reddit_content(self, url: str, session: Session, with_comments: bool) -> PyRSSWContent: content: str = "" page = session.get(url="%s/.json" % url, headers=self._get_headers()) json_content = page.content try: root = json.loads(json_content) except JSONDecodeError as _: content = "<strong>Status code: %d<br/></strong>" % page.status_code content += to_string(etree.HTML(page.content, parser=None)) root = {} datatypes = self._get_datatypes_json(root, "t3") # t3 : content for data in datatypes: content += "<h1>%s</h1>" % get_node_value_if_exists(data, "title") self_html: str = get_node_value_if_exists(data, "selftext_html") post_hint: str = get_node_value_if_exists(data, "post_hint") removed_by: str = get_node_value_if_exists( data, "removed_by") + get_node_value_if_exists( data, "removed_by_category") if removed_by == "": content = self._get_content_from_data(data=data, session=session, self_html=self_html, post_hint=post_hint) else: content = "Content removed" comments: str = "" if with_comments: comments = "<hr/><h2>Comments</h2>" comments_json = self._get_datatypes_json(root, "t1") # t1 : comments for comment_json in comments_json: comments += self.get_comments(comment_json) content = "<article>%s%s</article>" % (content, comments) return PyRSSWContent(content)
def get_content(self, url: str, parameters: dict, session: requests.Session) -> PyRSSWContent: content: str = "" content = session.get(url=url).text json_obj = json.loads(content) if json_obj is not None: content = "<p><b>%s</b></p>" % get_node_value_if_exists( json_obj, "title") content += "<p><b>%s</b></p>" % self._get_price(json_obj) content += "<p>%s - %s</p>" % (get_node_value_if_exists( json_obj, "postalCode"), get_node_value_if_exists(json_obj, "city")) content += "<hr/>" content += "<b>%s</b>" % get_node_value_if_exists( json_obj, "description") content += "<hr/>" content += self._build_imgs(self._get_img_urls(json_obj)) return PyRSSWContent(""" <div class=\"main-content\"> %s </div>""" % (content))
def get_feed(self, parameters: dict, session: requests.Session) -> str: items: str = "" if "criteria" in parameters: url = "%slist.htm=?%s" % (self.get_original_website(), unquote_plus(parameters["criteria"])) self._update_headers(session) content: str = session.get(url).text json_obj: Optional[dict] = self._load_json(content) if json_obj is not None and "cards" in json_obj and "list" in json_obj[ "cards"]: for card in json_obj["cards"]["list"]: location: str = get_node_value_if_exists(card, "cityLabel") district: str = get_node_value_if_exists( card, "districtLabel") if district != "": location += " - " + district small_description: str = get_node_value_if_exists( card, "description") url_detail: str = get_node_value_if_exists( card, "classifiedURL") price: str = self._get_price(card) img_url: str = "" other_imgs: str = "" img_url, other_imgs = self._process_images(card) if price != "": items += """<item> <title><![CDATA[%s - %s - %s]]></title> <description> <![CDATA[ <img src="%s"/><p>%s - %s - %s</p> %s ]]> </description> <link> %s </link> </item>""" % ( location, price, small_description, # NOSONAR img_url, location, price, small_description, other_imgs, self._get_url_prefix( self.get_handler_url_with_parameters( {"url": url_detail}))) else: self.log_error( "Unable to read json, blacklisted? (criteria=%s)" % parameters["criteria"]) items = """<item> <title>Seloger</title> <description>Unable to read json, blacklisted?</description> <link> %s </link> </item>""" % (self.get_handler_url_with_parameters({ "dummy": str(random.randrange(100000000000, 999999999999)) })) return """<rss version="2.0"> <channel> <title>Se Loger</title> <language>fr-FR</language> %s </channel> </rss>""" % items