Ejemplo n.º 1
0
    def edit(self, text, title=None, date_of_pub=None):
        """
        Edit concept.

        Args:
            text (str): New text of the context.
            title (str, default None): New title of the concept. If not set,
                  old title is used.
            date_of_pub (str/int, default None): Date string in abclinuxu
                        format or timestamp determining when the concept should
                        be automatically published.

        Note:
            `date_of_pub` can be string in format ``"%Y-%m-%d %H:%M"``.
        """
        if not self._meta:
            self._init_metadata()

        data = download(
            url_context(self._meta["Uprav zápis"]),
            session=self._session
        )
        dom = dhtmlparser.parseString(data)

        form = dom.find("form", {"name": "form"})

        assert form, "Can't find edit form!"
        form = first(form)

        form_action = form.params["action"]

        if title is None:
            title = first(form.find("input", {"name": "title"}))
            title = title.params["value"]

        date = ""
        if date_of_pub is None:
            date = first(form.find("input", {"name": "publish"}))
            date = date.params["value"]
        elif isinstance(date_of_pub, basestring):
            date = date_of_pub
        else:
            date = ts_to_concept_date(date_of_pub)

        data = download(
            url=url_context(form_action),
            method="POST",
            data={
                "cid": 0,
                "publish": date,
                "content": text,
                "title": title,
                "delay": "Ulož",
                "action": "edit2"
            },
            session=self._session
        )
        check_error_div(data, '<div class="error" id="contentError">')
        check_error_page(data)
Ejemplo n.º 2
0
    def edit(self, text, title=None, date_of_pub=None):
        """
        Edit concept.

        Args:
            text (str): New text of the context.
            title (str, default None): New title of the concept. If not set,
                  old title is used.
            date_of_pub (str/int, default None): Date string in abclinuxu
                        format or timestamp determining when the concept should
                        be automatically published.

        Note:
            `date_of_pub` can be string in format ``"%Y-%m-%d %H:%M"``.
        """
        if not self._meta:
            self._init_metadata()

        data = download(url_context(self._meta["Uprav zápis"]),
                        session=self._session)
        dom = dhtmlparser.parseString(data)

        form = dom.find("form", {"name": "form"})

        assert form, "Can't find edit form!"
        form = first(form)

        form_action = form.params["action"]

        if title is None:
            title = first(form.find("input", {"name": "title"}))
            title = title.params["value"]

        date = ""
        if date_of_pub is None:
            date = first(form.find("input", {"name": "publish"}))
            date = date.params["value"]
        elif isinstance(date_of_pub, basestring):
            date = date_of_pub
        else:
            date = ts_to_concept_date(date_of_pub)

        data = download(url=url_context(form_action),
                        method="POST",
                        data={
                            "cid": 0,
                            "publish": date,
                            "content": text,
                            "title": title,
                            "delay": "Ulož",
                            "action": "edit2"
                        },
                        session=self._session)
        check_error_div(data, '<div class="error" id="contentError">')
        check_error_page(data)
Ejemplo n.º 3
0
    def add_tag(self, tag):
        """
        Add new tag to the blogpost.

        Args:
            tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for
                list of all possible tags.

        Raises:
            KeyError: In case, that `tag` is not instance of :class:`Tag`.
            ValueError: In case that :attr:`uid` is not set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        if not isinstance(tag, Tag):
            raise KeyError(
                "Tag have instance of Tag and to be from .possible_tags()")

        if not self.uid:
            raise ValueError(
                "Can't assign tag - .uid property not set. Call .pull() or "
                "assign .uid manually.")

        tags_xml = download(
            url_context("/ajax/tags/assign?rid=%d&tagID=%s" %
                        (self.uid, tag.norm)))

        self.tags = self.__class__._parse_tags(tags_xml)

        return self.tags
Ejemplo n.º 4
0
    def add_pic(self, opened_file):
        """
        Add picture to the Concept.

        Args:
            opened_file (file): opened file object
        """
        # init meta
        if not self._meta:
            self._init_metadata()

        # get link to pic form
        data = download(url_context(self._meta["Přidej obrázek"]),
                        session=self._session)
        dom = dhtmlparser.parseString(data)

        # get information from pic form
        form = first(dom.find("form", {"enctype": "multipart/form-data"}))
        add_pic_url = form.params["action"]

        # send pic
        data = self._session.post(url_context(add_pic_url),
                                  data={
                                      "action": "addScreenshot2",
                                      "finish": "Nahrát"
                                  },
                                  files={"screenshot": opened_file})
        data = data.text.encode("utf-8")
        check_error_div(data, '<div class="error" id="screenshotError">')
Ejemplo n.º 5
0
    def from_user_id(user_id):
        """
        Transform `user_id` to instance of :class:`User`.

        Returns:
            obj: :class:`User` instance parsed from the `user_id`.
        """
        data = shared.download(url_context("/Profile/" + str(user_id)))
        dom = dhtmlparser.parseString(data)
        dhtmlparser.makeDoubleLinked(dom)

        shared.handle_errors(dom)

        # <li><a href="/lide/unittest/objekty" rel="nofollow">Seznam příspěvků
        # na abclinuxu.cz</a>
        a_tags = dom.find(
            "a",
            fn=lambda x: x.params.get("href", "").startswith("/lide/")
        )

        # pick only links which have content that starts with Seznam
        links = [
            a_tag.params["href"]
            for a_tag in a_tags
            if a_tag.getContent().startswith("Seznam")
        ]

        username = links[-1].split("/")[2]

        return User(username)
Ejemplo n.º 6
0
    def from_user_id(user_id):
        """
        Transform `user_id` to instance of :class:`User`.

        Returns:
            obj: :class:`User` instance parsed from the `user_id`.
        """
        data = shared.download(url_context("/Profile/" + str(user_id)))
        dom = dhtmlparser.parseString(data)
        dhtmlparser.makeDoubleLinked(dom)

        shared.handle_errors(dom)

        # <li><a href="/lide/unittest/objekty" rel="nofollow">Seznam příspěvků
        # na abclinuxu.cz</a>
        a_tags = dom.find(
            "a", fn=lambda x: x.params.get("href", "").startswith("/lide/"))

        # pick only links which have content that starts with Seznam
        links = [
            a_tag.params["href"] for a_tag in a_tags
            if a_tag.getContent().startswith("Seznam")
        ]

        username = links[-1].split("/")[2]

        return User(username)
Ejemplo n.º 7
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        data = _remove_crap_from_bloglist(data)

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break
Ejemplo n.º 8
0
    def add_tag(self, tag):
        """
        Add new tag to the blogpost.

        Args:
            tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for
                list of all possible tags.

        Raises:
            KeyError: In case, that `tag` is not instance of :class:`Tag`.
            ValueError: In case that :attr:`uid` is not set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        if not isinstance(tag, Tag):
            raise KeyError(
                "Tag have instance of Tag and to be from .possible_tags()"
            )

        if not self.uid:
            raise ValueError(
                "Can't assign tag - .uid property not set. Call .pull() or "
                "assign .uid manually."
            )

        tags_xml = download(url_context(
            "/ajax/tags/assign?rid=%d&tagID=%s" % (self.uid, tag.norm)
        ))

        self.tags = self.__class__._parse_tags(tags_xml)

        return self.tags
Ejemplo n.º 9
0
    def add_pic(self, opened_file):
        """
        Add picture to the Concept.

        Args:
            opened_file (file): opened file object
        """
        # init meta
        if not self._meta:
            self._init_metadata()

        # get link to pic form
        data = download(
            url_context(self._meta["Přidej obrázek"]),
            session=self._session
        )
        dom = dhtmlparser.parseString(data)

        # get information from pic form
        form = first(dom.find("form", {"enctype": "multipart/form-data"}))
        add_pic_url = form.params["action"]

        # send pic
        data = self._session.post(
            url_context(add_pic_url),
            data={
                "action": "addScreenshot2",
                "finish": "Nahrát"
            },
            files={"screenshot": opened_file}
        )
        data = data.text.encode("utf-8")
        check_error_div(data, '<div class="error" id="screenshotError">')
Ejemplo n.º 10
0
    def test_end_of_bloglist(pagination):
        url = _shared.url_context("/blog/?from=%d" % (pagination * 25))
        data = _shared.download(url)
        dom = _dhtmlparser.parseString(_remove_crap_from_bloglist(data))

        if progress_fn:
            progress_fn(pagination)

        return _should_continue(dom)
Ejemplo n.º 11
0
    def possible_tags(cls):
        """
        Get list of all possible tags which may be set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        tags_xml = download(url_context("/ajax/tags/list"))

        return cls._parse_tags(tags_xml)
Ejemplo n.º 12
0
    def possible_tags(cls):
        """
        Get list of all possible tags which may be set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        tags_xml = download(url_context("/ajax/tags/list"))

        return cls._parse_tags(tags_xml)
Ejemplo n.º 13
0
    def _get(self, url, params=None, as_text=True):
        """
        Shortcut for ``self.session.get().text.encode("utf-8")``.

        Args:
            url (str): Url on which the GET request will be sent.
            params (dict): GET parameters.
            as_text (bool, default True): Return result as text or binary data.

        Returns:
            str/binary data: depending on the `as_text` parameter.
        """
        return shared.download(url=url,
                               params=params,
                               session=self.session,
                               as_text=as_text)
Ejemplo n.º 14
0
    def _init_metadata(self, data=None):
        if not data:
            data = download(self.link, session=self._session)

        if '<div class="s_nadpis">Správa zápisku</div>' not in data:
            raise ValueError(
                "Can't parse metadata! It looks like I am not logged in!")

        data = data.split('<div class="s_nadpis">Správa zápisku</div>')[1]

        dom = dhtmlparser.parseString(data)
        meta_list = first(dom.find("div", {"class": "s_sekce"}))

        self._meta = {}
        for li in meta_list.find("li"):
            a = first(li.find("a"))
            self._meta[a.getContent().strip()] = a.params["href"]
Ejemplo n.º 15
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        # clean crap, get just content
        data = data.split(
            '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>'
        )[0]
        data = data.split('<div class="st" id="st">')[1]

        # some blogs have openning comment in perex, which f***s ups bloglist
        # - this will close comments that goes over bloglist
        data = data.replace(
            '<div class="signature">',
            '<!-- --><div class="signature">'
        )

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break
Ejemplo n.º 16
0
    def _get(self, url, params=None, as_text=True):
        """
        Shortcut for ``self.session.get().text.encode("utf-8")``.

        Args:
            url (str): Url on which the GET request will be sent.
            params (dict): GET parameters.
            as_text (bool, default True): Return result as text or binary data.

        Returns:
            str/binary data: depending on the `as_text` parameter.
        """
        return shared.download(
            url=url,
            params=params,
            session=self.session,
            as_text=as_text
        )
Ejemplo n.º 17
0
    def _init_metadata(self, data=None):
        if not data:
            data = download(self.link, session=self._session)

        if '<div class="s_nadpis">Správa zápisku</div>' not in data:
            raise ValueError(
                "Can't parse metadata! It looks like I am not logged in!"
            )

        data = data.split('<div class="s_nadpis">Správa zápisku</div>')[1]

        dom = dhtmlparser.parseString(data)
        meta_list = first(dom.find("div", {"class": "s_sekce"}))

        self._meta = {}
        for li in meta_list.find("li"):
            a = first(li.find("a"))
            self._meta[a.getContent().strip()] = a.params["href"]
Ejemplo n.º 18
0
    def remove_tag(self, tag, throw=False):
        """
        Remove tag from the tags currently assigned to blogpost.

        Args:
            tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for
                list of all possible tags.
            throw (bool): Raise error in case you are trying to remove
                tag that is not assigned to blogpost.

        Raises:
            KeyError: In case, that `tag` is not instance of :class:`Tag`.
            IndexError: In case that you are trying to remove tag which is not
                assigned to blogpost.
            ValueError: In case that :attr:`uid` is not set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        if not isinstance(tag, Tag):
            raise KeyError(
                "Tag have instance of Tag and to be from .tags()"
            )

        if tag not in self.tags:
            if not throw:
                return self.tags

            raise IndexError("Can't remove unassigned tag.")

        if not self.uid:
            raise ValueError(
                "Can't assign tag - .uid property not set. Call .pull() or "
                "assign .uid manually."
            )

        tags_xml = download(url_context(
            "/ajax/tags/unassign?rid=%d&tagID=%s" % (self.uid, tag.norm)
        ))

        self.tags = self.__class__._parse_tags(tags_xml)

        return self.tags
Ejemplo n.º 19
0
def iter_blogposts(start=0, end=None, lazy=True):
    """
    Iterate over blogs. Based at bloglist.

    Args:
        start (int, default 0): Start at this page.
        end (int, default None): End at this page.
        lazy (bool, default True): Initialize :class:`.Blogpost` objects only
             with informations from listings. Don't download full text and
             comments.

    Yields:
        obj: :class:`.Blogpost` objects.
    """
    for cnt, url in enumerate(_next_blog_url(start)):
        data = _shared.download(url)

        # clean crap, get just content
        data = data.split(
            '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>')[0]
        data = data.split('<div class="st" id="st">')[1]

        # some blogs have openning comment in perex, which f***s ups bloglist
        # - this will close comments that goes over bloglist
        data = data.replace('<div class="signature">',
                            '<!-- --><div class="signature">')

        # parse basic info about all blogs at page
        dom = _dhtmlparser.parseString(data)
        for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})):
            yield Blogpost.from_html(blog, lazy=lazy)

            # every page has 25 blogposts, but somethimes I am getting more
            if bcnt >= 24:
                break

        # detect end of pagination at the bottom
        if not _should_continue(dom):
            break

        if end is not None and cnt >= end:
            break
Ejemplo n.º 20
0
    def list_pics(self):
        """
        Return:
            list: List of URLs to pictures used in this concept.
        """
        # init meta
        if not self._meta:
            self._init_metadata()

        data = download(url_context(self._meta["Správa příloh"]),
                        session=self._session)
        dom = dhtmlparser.parseString(data)

        form = dom.find("form", {"name": "form"})
        assert form, "Can't find pic form!"

        return [
            a.params["href"] for a in first(form).find("a")
            if "href" in a.params
        ]
Ejemplo n.º 21
0
    def remove_tag(self, tag, throw=False):
        """
        Remove tag from the tags currently assigned to blogpost.

        Args:
            tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for
                list of all possible tags.
            throw (bool): Raise error in case you are trying to remove
                tag that is not assigned to blogpost.

        Raises:
            KeyError: In case, that `tag` is not instance of :class:`Tag`.
            IndexError: In case that you are trying to remove tag which is not
                assigned to blogpost.
            ValueError: In case that :attr:`uid` is not set.

        Returns:
            list: List of :class:`Tag` objects.
        """
        if not isinstance(tag, Tag):
            raise KeyError("Tag have instance of Tag and to be from .tags()")

        if tag not in self.tags:
            if not throw:
                return self.tags

            raise IndexError("Can't remove unassigned tag.")

        if not self.uid:
            raise ValueError(
                "Can't assign tag - .uid property not set. Call .pull() or "
                "assign .uid manually.")

        tags_xml = download(
            url_context("/ajax/tags/unassign?rid=%d&tagID=%s" %
                        (self.uid, tag.norm)))

        self.tags = self.__class__._parse_tags(tags_xml)

        return self.tags
Ejemplo n.º 22
0
    def pull(self):
        """
        Download page with blogpost. Parse text, comments and everything else.

        Until this is called, following attributes are not known/parsed:

            - :attr:`text`
            - :attr:`tags`
            - :attr:`has_tux`
            - :attr:`comments`
            - :attr:`last_modified_ts`
        """
        data = download(url=self.url)

        # this is because of f***s who forgot to close elements like in this
        # blogpost: https://www.abclinuxu.cz/blog/EmentuX/2005/10/all-in-one
        blog_data, comments_data = data.split('<p class="page_tools">')

        self._dom = dhtmlparser.parseString(blog_data)
        self._content_tag = None
        dhtmlparser.makeDoubleLinked(self._dom)

        self._parse_uid()
        self._parse_title()
        self._parse_text()
        self._parse_rating()
        self._parse_meta()

        self._tags = self._get_tags()

        # there are blogs with f****d up HTML which is basically unparsable
        if self.relative_url not in COMMENT_BANLIST:
            self.comments = Comment.comments_from_html(comments_data)
            self.comments_n = len(self.comments)

        # memory cleanup - this saves a LOT of memory
        self._dom = None
        self._content_tag = None
Ejemplo n.º 23
0
    def pull(self):
        """
        Download page with blogpost. Parse text, comments and everything else.

        Until this is called, following attributes are not known/parsed:

            - :attr:`text`
            - :attr:`tags`
            - :attr:`has_tux`
            - :attr:`comments`
            - :attr:`last_modified_ts`
        """
        data = download(url=self.url)

        # this is because of f***s who forgot to close elements like in this
        # blogpost: https://www.abclinuxu.cz/blog/EmentuX/2005/10/all-in-one
        blog_data, comments_data = data.split('<p class="page_tools">')

        self._dom = dhtmlparser.parseString(blog_data)
        self._content_tag = None
        dhtmlparser.makeDoubleLinked(self._dom)

        self._parse_uid()
        self._parse_title()
        self._parse_text()
        self._parse_rating()
        self._parse_meta()

        self._tags = self._get_tags()

        # there are blogs with f****d up HTML which is basically unparsable
        if self.relative_url not in COMMENT_BANLIST:
            self.comments = Comment.comments_from_html(comments_data)
            self.comments_n = len(self.comments)

        # memory cleanup - this saves a LOT of memory
        self._dom = None
        self._content_tag = None
Ejemplo n.º 24
0
    def list_pics(self):
        """
        Return:
            list: List of URLs to pictures used in this concept.
        """
        # init meta
        if not self._meta:
            self._init_metadata()

        data = download(
            url_context(self._meta["Správa příloh"]),
            session=self._session
        )
        dom = dhtmlparser.parseString(data)

        form = dom.find("form", {"name": "form"})
        assert form, "Can't find pic form!"

        return [
            a.params["href"]
            for a in first(form).find("a")
            if "href" in a.params
        ]
Ejemplo n.º 25
0
    def get_content(self):
        """
        Get content of this Concept.

        Returns:
            str: full HTML UTF-8 encoded text of the concept.
        """
        data = download(self.link, session=self._session)

        if not self._meta:
            self._init_metadata(data)

        data = first(data.rsplit('<!-- -->', 1))

        # find beginning of the concept text
        dom = dhtmlparser.parseString(data)
        meta_vypis = dom.find("p", {"class": "meta-vypis"})
        if not meta_vypis:
            raise ValueError("Can't find meta-vypis <p>!")

        meta_vypis = first(meta_vypis)
        data = data.split(str(meta_vypis))[1]

        return data.strip()
Ejemplo n.º 26
0
    def get_content(self):
        """
        Get content of this Concept.

        Returns:
            str: full HTML UTF-8 encoded text of the concept.
        """
        data = download(self.link, session=self._session)

        if not self._meta:
            self._init_metadata(data)

        data = first(data.rsplit('<!-- -->', 1))

        # find beginning of the concept text
        dom = dhtmlparser.parseString(data)
        meta_vypis = dom.find("p", {"class": "meta-vypis"})
        if not meta_vypis:
            raise ValueError("Can't find meta-vypis <p>!")

        meta_vypis = first(meta_vypis)
        data = data.split(str(meta_vypis))[1]

        return data.strip()
Ejemplo n.º 27
0
 def _get_tags(self):
     # parse tags
     tags_url = "/ajax/tags/assigned?rid=%d" % self.uid
     tags_xml = download(url_context(tags_url))
     return self.__class__._parse_tags(tags_xml)
Ejemplo n.º 28
0
def get(url, name, match):
    print('Downloading {}'.format(url))
    # if not os.path.exists(os.path.join(DOWNLOAD_DIR, os.path.basename(url))):
    file = download(url, DOWNLOAD_DIR)
    print('Extracting {}'.format(name))
    unzip(file, os.path.join(EXTRACTED_DIR, name), match)
Ejemplo n.º 29
0
 def _get_tags(self):
     # parse tags
     tags_url = "/ajax/tags/assigned?rid=%d" % self.uid
     tags_xml = download(url_context(tags_url))
     return self.__class__._parse_tags(tags_xml)