コード例 #1
0
ファイル: redditng_handler.py プロジェクト: yawks/pyrssw
    def _get_content_from_data(self, data, session: Session, self_html: str,
                               post_hint: str) -> str:
        content: str = ""
        url_overridden_by_dest: str = get_node_value_if_exists(
            data, "url_overridden_by_dest")
        if len(url_overridden_by_dest
               ) > 0 and url_overridden_by_dest[:1] == '/':
            url_overridden_by_dest = "https://www.reddit.com" + url_overridden_by_dest
        preview_image: Optional[str] = cast(
            Optional[str],
            get_node(data, "preview", "images", 0, "source", "url"))
        is_gallery: str = str(get_node_value_if_exists(data, "is_gallery"))
        domain: Optional[str] = cast(str, get_node(data, "domain"))

        if self_html != "":
            content += html.unescape(self_html)

        if is_gallery == "True":
            content += self._manage_gallery(data)

        c: Optional[str] = self._manage_external_content(
            session, url_overridden_by_dest, post_hint, preview_image, domain,
            data)
        if c is not None:
            content += c

        content = self._manage_reddit_preview_images(content)
        content = content.replace("<video ", "<video controls ")

        return content
コード例 #2
0
    def get_feed(self, parameters: dict, session: requests.Session) -> str:
        items: str = ""
        if "criteria" in parameters:
            url = "%s%s" % (self.get_original_website(),
                            unquote_plus(parameters["criteria"]))
            page = session.get(url)
            json_obj: Optional[dict] = self._load_json(page.text,
                                                       "__REDIAL_PROPS__ = ")
            if json_obj is not None and len(
                    json_obj["root"]) > 5 and "data" in json_obj["root"][
                        5] and "ads" in json_obj["root"][5]["data"]:
                for card in json_obj["root"][5]["data"]["ads"]:

                    location: str = self._get_location(card)
                    small_description: str = get_node_value_if_exists(
                        card, "subject")
                    description: str = get_node_value_if_exists(card, "body")
                    url_detail: str = get_node_value_if_exists(card, "url")
                    price: str = self._get_price(card)
                    publication_date: str = get_node_value_if_exists(
                        card, "first_publication_date")

                    img_url: str = ""
                    other_imgs: str = ""
                    img_url, other_imgs = self._process_images(card)

                    if price != "":
                        items += """<item>
            <title><![CDATA[%s - %s - %s]]></title>
            <description>
                <![CDATA[
                    <img src="%s"/><p>%s - %s - %s</p>
                    %s
                ]]>
            </description>
            <link>
                %s
            </link>
            <pubDate>%s</pubDate>
        </item>""" % (location, price, small_description, img_url, location,
                        price, description, other_imgs,
                        self.get_handler_url_with_parameters(
                          {"url": url_detail}), publication_date)

        return """<rss version="2.0">
    <channel>
        <title>Le bon coin</title>
        <language>fr-FR</language>
        %s
    </channel>
</rss>""" % items
コード例 #3
0
ファイル: bienici_handler.py プロジェクト: yawks/pyrssw
    def _get_price(self, entry: dict) -> str:
        price: str = ""
        p = get_node_value_if_exists(entry, "price")
        if isinstance(p, int):
            price = "%s €" % "{:,}".format(p).replace(",", " ")

        return price
コード例 #4
0
ファイル: bienici_handler.py プロジェクト: yawks/pyrssw
    def get_feed(self, parameters: dict, session: requests.Session) -> str:
        items: str = ""
        if "criteria" in parameters:
            url = "%s%s" % (self.get_original_website(),
                            unquote_plus(parameters["criteria"]))
            page = session.get(url)

            json_obj = json.loads(page.text)
            if json_obj is not None and "realEstateAds" in json_obj:
                for entry in json_obj["realEstateAds"]:

                    location: str = get_node_value_if_exists(entry, "city")
                    price: str = self._get_price(entry)
                    small_description: str = get_node_value_if_exists(
                        entry, "title")
                    description: str = get_node_value_if_exists(
                        entry, "description")
                    url_detail: str = "https://www.bienici.com/realEstateAd.json?id=%s" % get_node_value_if_exists(
                        entry, "id")
                    img_urls: List[str] = self._get_img_urls(entry)

                    items += """<item>
                <title><![CDATA[%s - %s - %s]]></title>
                <description>
                    <![CDATA[
                        <img src="%s"/><p>%s - %s - %s</p>
                        %s
                        %s
                    ]]>
                </description>
                <link>
                    %s
                </link>
            </item>""" % (location, price, small_description,
                          img_urls[0] if len(img_urls) > 0 else "", location,
                          price, small_description, description,
                          self._build_imgs(img_urls),
                          self.get_handler_url_with_parameters(
                              {"url": url_detail}))

        return """<rss version="2.0">
    <channel>
        <title>Bien Ici</title>
        <language>fr-FR</language>
        %s
    </channel>
</rss>""" % items
コード例 #5
0
    def get_content(self, url: str, parameters: dict,
                    session: requests.Session) -> PyRSSWContent:
        content: str = ""

        page = session.get(url=url)
        json_obj: Optional[dict] = self._load_json(
            page.text, "__NEXT_DATA__\" type=\"application/json\">")
        if json_obj is not None and "props" in json_obj[
                "root"] and "pageProps" in json_obj["root"][
                    "props"] and "ad" in json_obj["root"]["props"]["pageProps"]:
            node = json_obj["root"]["props"]["pageProps"]["ad"]
            content = "<p><b>%s</b></p>" % get_node_value_if_exists(
                node, "subject")
            content += "<p><b>%s</b></p>" % self._get_price(node)
            content += "<p>%s</p>" % self._get_location(node)
            content += "<hr/>"
            content += "<b>%s</b>" % get_node_value_if_exists(node, "body")
            content += "<hr/>"

            other_imgs: str = ""
            _, other_imgs = self._process_images(node)
            content += other_imgs

            content += "<hr/>"
            content += "<p>%s</p>" % get_node_value_if_exists(
                node, "category_name")
            content += "<hr/>"
            if "attributes" in node:
                for attribute in node["attributes"]:
                    key_label: str = get_node_value_if_exists(
                        attribute, "key_label")
                    if key_label != "":
                        content += "<p><strong>%s</strong>: %s</p>" % (
                            key_label,
                            get_node_value_if_exists(attribute, "value_label"))

        return PyRSSWContent("""
                <div class=\"main-content\">
                    %s
                </div>""" % (content))
コード例 #6
0
ファイル: redditng_handler.py プロジェクト: yawks/pyrssw
    def get_reddit_content(self, url: str, session: Session,
                           with_comments: bool) -> PyRSSWContent:
        content: str = ""
        page = session.get(url="%s/.json" % url, headers=self._get_headers())
        json_content = page.content

        try:
            root = json.loads(json_content)
        except JSONDecodeError as _:
            content = "<strong>Status code: %d<br/></strong>" % page.status_code
            content += to_string(etree.HTML(page.content, parser=None))
            root = {}
        datatypes = self._get_datatypes_json(root, "t3")  # t3 : content
        for data in datatypes:
            content += "<h1>%s</h1>" % get_node_value_if_exists(data, "title")
            self_html: str = get_node_value_if_exists(data, "selftext_html")
            post_hint: str = get_node_value_if_exists(data, "post_hint")
            removed_by: str = get_node_value_if_exists(
                data, "removed_by") + get_node_value_if_exists(
                    data, "removed_by_category")
            if removed_by == "":
                content = self._get_content_from_data(data=data,
                                                      session=session,
                                                      self_html=self_html,
                                                      post_hint=post_hint)
            else:
                content = "Content removed"

        comments: str = ""
        if with_comments:
            comments = "<hr/><h2>Comments</h2>"
            comments_json = self._get_datatypes_json(root,
                                                     "t1")  # t1 : comments
            for comment_json in comments_json:
                comments += self.get_comments(comment_json)

        content = "<article>%s%s</article>" % (content, comments)

        return PyRSSWContent(content)
コード例 #7
0
ファイル: bienici_handler.py プロジェクト: yawks/pyrssw
    def get_content(self, url: str, parameters: dict,
                    session: requests.Session) -> PyRSSWContent:
        content: str = ""

        content = session.get(url=url).text

        json_obj = json.loads(content)
        if json_obj is not None:
            content = "<p><b>%s</b></p>" % get_node_value_if_exists(
                json_obj, "title")
            content += "<p><b>%s</b></p>" % self._get_price(json_obj)
            content += "<p>%s - %s</p>" % (get_node_value_if_exists(
                json_obj,
                "postalCode"), get_node_value_if_exists(json_obj, "city"))
            content += "<hr/>"
            content += "<b>%s</b>" % get_node_value_if_exists(
                json_obj, "description")
            content += "<hr/>"
            content += self._build_imgs(self._get_img_urls(json_obj))

        return PyRSSWContent("""
                <div class=\"main-content\">
                    %s
                </div>""" % (content))
コード例 #8
0
ファイル: seloger_handler.py プロジェクト: yawks/pyrssw
    def get_feed(self, parameters: dict, session: requests.Session) -> str:
        items: str = ""
        if "criteria" in parameters:
            url = "%slist.htm=?%s" % (self.get_original_website(),
                                      unquote_plus(parameters["criteria"]))

            self._update_headers(session)
            content: str = session.get(url).text

            json_obj: Optional[dict] = self._load_json(content)
            if json_obj is not None and "cards" in json_obj and "list" in json_obj[
                    "cards"]:
                for card in json_obj["cards"]["list"]:

                    location: str = get_node_value_if_exists(card, "cityLabel")
                    district: str = get_node_value_if_exists(
                        card, "districtLabel")
                    if district != "":
                        location += " - " + district

                    small_description: str = get_node_value_if_exists(
                        card, "description")
                    url_detail: str = get_node_value_if_exists(
                        card, "classifiedURL")
                    price: str = self._get_price(card)

                    img_url: str = ""
                    other_imgs: str = ""
                    img_url, other_imgs = self._process_images(card)

                    if price != "":
                        items += """<item>
            <title><![CDATA[%s - %s - %s]]></title>
            <description>
                <![CDATA[
                    <img src="%s"/><p>%s - %s - %s</p>
                    %s
                ]]>
            </description>
            <link>
                %s
            </link>
        </item>""" % (
                            location,
                            price,
                            small_description,  # NOSONAR
                            img_url,
                            location,
                            price,
                            small_description,
                            other_imgs,
                            self._get_url_prefix(
                                self.get_handler_url_with_parameters(
                                    {"url": url_detail})))
            else:
                self.log_error(
                    "Unable to read json, blacklisted? (criteria=%s)" %
                    parameters["criteria"])
                items = """<item>
            <title>Seloger</title>
            <description>Unable to read json, blacklisted?</description>
            <link>
                %s
            </link>
        </item>""" % (self.get_handler_url_with_parameters({
                    "dummy":
                    str(random.randrange(100000000000, 999999999999))
                }))

        return """<rss version="2.0">
    <channel>
        <title>Se Loger</title>
        <language>fr-FR</language>
        %s
    </channel>
</rss>""" % items