def add_pic(self, opened_file): """ Add picture to the Concept. Args: opened_file (file): opened file object """ # init meta if not self._meta: self._init_metadata() # get link to pic form data = download( url_context(self._meta["Přidej obrázek"]), session=self._session ) dom = dhtmlparser.parseString(data) # get information from pic form form = first(dom.find("form", {"enctype": "multipart/form-data"})) add_pic_url = form.params["action"] # send pic data = self._session.post( url_context(add_pic_url), data={ "action": "addScreenshot2", "finish": "Nahrát" }, files={"screenshot": opened_file} ) data = data.text.encode("utf-8") check_error_div(data, '<div class="error" id="screenshotError">')
def add_pic(self, opened_file): """ Add picture to the Concept. Args: opened_file (file): opened file object """ # init meta if not self._meta: self._init_metadata() # get link to pic form data = download(url_context(self._meta["Přidej obrázek"]), session=self._session) dom = dhtmlparser.parseString(data) # get information from pic form form = first(dom.find("form", {"enctype": "multipart/form-data"})) add_pic_url = form.params["action"] # send pic data = self._session.post(url_context(add_pic_url), data={ "action": "addScreenshot2", "finish": "Nahrát" }, files={"screenshot": opened_file}) data = data.text.encode("utf-8") check_error_div(data, '<div class="error" id="screenshotError">')
def edit(self, text, title=None, date_of_pub=None): """ Edit concept. Args: text (str): New text of the context. title (str, default None): New title of the concept. If not set, old title is used. date_of_pub (str/int, default None): Date string in abclinuxu format or timestamp determining when the concept should be automatically published. Note: `date_of_pub` can be string in format ``"%Y-%m-%d %H:%M"``. """ if not self._meta: self._init_metadata() data = download( url_context(self._meta["Uprav zápis"]), session=self._session ) dom = dhtmlparser.parseString(data) form = dom.find("form", {"name": "form"}) assert form, "Can't find edit form!" form = first(form) form_action = form.params["action"] if title is None: title = first(form.find("input", {"name": "title"})) title = title.params["value"] date = "" if date_of_pub is None: date = first(form.find("input", {"name": "publish"})) date = date.params["value"] elif isinstance(date_of_pub, basestring): date = date_of_pub else: date = ts_to_concept_date(date_of_pub) data = download( url=url_context(form_action), method="POST", data={ "cid": 0, "publish": date, "content": text, "title": title, "delay": "Ulož", "action": "edit2" }, session=self._session ) check_error_div(data, '<div class="error" id="contentError">') check_error_page(data)
def edit(self, text, title=None, date_of_pub=None): """ Edit concept. Args: text (str): New text of the context. title (str, default None): New title of the concept. If not set, old title is used. date_of_pub (str/int, default None): Date string in abclinuxu format or timestamp determining when the concept should be automatically published. Note: `date_of_pub` can be string in format ``"%Y-%m-%d %H:%M"``. """ if not self._meta: self._init_metadata() data = download(url_context(self._meta["Uprav zápis"]), session=self._session) dom = dhtmlparser.parseString(data) form = dom.find("form", {"name": "form"}) assert form, "Can't find edit form!" form = first(form) form_action = form.params["action"] if title is None: title = first(form.find("input", {"name": "title"})) title = title.params["value"] date = "" if date_of_pub is None: date = first(form.find("input", {"name": "publish"})) date = date.params["value"] elif isinstance(date_of_pub, basestring): date = date_of_pub else: date = ts_to_concept_date(date_of_pub) data = download(url=url_context(form_action), method="POST", data={ "cid": 0, "publish": date, "content": text, "title": title, "delay": "Ulož", "action": "edit2" }, session=self._session) check_error_div(data, '<div class="error" id="contentError">') check_error_page(data)
def from_user_id(user_id): """ Transform `user_id` to instance of :class:`User`. Returns: obj: :class:`User` instance parsed from the `user_id`. """ data = shared.download(url_context("/Profile/" + str(user_id))) dom = dhtmlparser.parseString(data) dhtmlparser.makeDoubleLinked(dom) shared.handle_errors(dom) # <li><a href="/lide/unittest/objekty" rel="nofollow">Seznam příspěvků # na abclinuxu.cz</a> a_tags = dom.find( "a", fn=lambda x: x.params.get("href", "").startswith("/lide/")) # pick only links which have content that starts with Seznam links = [ a_tag.params["href"] for a_tag in a_tags if a_tag.getContent().startswith("Seznam") ] username = links[-1].split("/")[2] return User(username)
def add_tag(self, tag): """ Add new tag to the blogpost. Args: tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for list of all possible tags. Raises: KeyError: In case, that `tag` is not instance of :class:`Tag`. ValueError: In case that :attr:`uid` is not set. Returns: list: List of :class:`Tag` objects. """ if not isinstance(tag, Tag): raise KeyError( "Tag have instance of Tag and to be from .possible_tags()") if not self.uid: raise ValueError( "Can't assign tag - .uid property not set. Call .pull() or " "assign .uid manually.") tags_xml = download( url_context("/ajax/tags/assign?rid=%d&tagID=%s" % (self.uid, tag.norm))) self.tags = self.__class__._parse_tags(tags_xml) return self.tags
def add_tag(self, tag): """ Add new tag to the blogpost. Args: tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for list of all possible tags. Raises: KeyError: In case, that `tag` is not instance of :class:`Tag`. ValueError: In case that :attr:`uid` is not set. Returns: list: List of :class:`Tag` objects. """ if not isinstance(tag, Tag): raise KeyError( "Tag have instance of Tag and to be from .possible_tags()" ) if not self.uid: raise ValueError( "Can't assign tag - .uid property not set. Call .pull() or " "assign .uid manually." ) tags_xml = download(url_context( "/ajax/tags/assign?rid=%d&tagID=%s" % (self.uid, tag.norm) )) self.tags = self.__class__._parse_tags(tags_xml) return self.tags
def _parse_url(head_tag): comment_id = head_tag.params["id"] # parse full link from # <a href="/blog/EditDiscussion/400959;jsessionid=kufis2spplnh6gu671mxq # e2j?action=add&dizId=210591&threadId=9">Odpovědět</a> response_tag = head_tag.find( "a", fn=lambda x: x.getContent() == "Odpovědět" ) try: response_link = first(response_tag).params["href"] except StopIteration: return None # /blog/EditDiscussion/400959;jsessii... -> /blog/EditDiscussion/400959 response_link = response_link.split(";")[0] # /blog/EditDiscussion/400959?action=a.. -> /blog/EditDiscussion/400959 response_link = response_link.split("?")[0] # /blog/EditDiscussion/400959 -> 400959 blog_id = first( token for token in response_link.split("/") if token.isdigit() ) return url_context("/blog/show/%s#%s" % (blog_id, comment_id))
def from_user_id(user_id): """ Transform `user_id` to instance of :class:`User`. Returns: obj: :class:`User` instance parsed from the `user_id`. """ data = shared.download(url_context("/Profile/" + str(user_id))) dom = dhtmlparser.parseString(data) dhtmlparser.makeDoubleLinked(dom) shared.handle_errors(dom) # <li><a href="/lide/unittest/objekty" rel="nofollow">Seznam příspěvků # na abclinuxu.cz</a> a_tags = dom.find( "a", fn=lambda x: x.params.get("href", "").startswith("/lide/") ) # pick only links which have content that starts with Seznam links = [ a_tag.params["href"] for a_tag in a_tags if a_tag.getContent().startswith("Seznam") ] username = links[-1].split("/")[2] return User(username)
def test_end_of_bloglist(pagination): url = _shared.url_context("/blog/?from=%d" % (pagination * 25)) data = _shared.download(url) dom = _dhtmlparser.parseString(_remove_crap_from_bloglist(data)) if progress_fn: progress_fn(pagination) return _should_continue(dom)
def possible_tags(cls): """ Get list of all possible tags which may be set. Returns: list: List of :class:`Tag` objects. """ tags_xml = download(url_context("/ajax/tags/list")) return cls._parse_tags(tags_xml)
def _next_blog_url(start=0): """ Args: start (int, default 0): Start at this page. Yields: str: Another url for blog listing. """ for i in xrange(1000000): if i < start: continue yield _shared.url_context("/blog/?from=%d" % (i * 25))
def from_html(html, lazy=True): """ Convert HTML string to :class:`Blogpost` instance. Args: html (str): Input data. lazy (bool, default True): Be lazy (don't pull data by yourself from the site). Call :meth:`pull` for active download of all required informations. Returns: obj: :class:`Blogpost` instance. """ if not isinstance(html, dhtmlparser.HTMLElement): html = dhtmlparser.parseString(html) dhtmlparser.makeDoubleLinked(html) # support for legacy blogs title_tag = html.find("h2", {"class": "st_nadpis"}) if title_tag: title_tag = first(title_tag) rel_link = first(title_tag.find("a")).params["href"] link = url_context(rel_link) else: title_tag = first(html.find("h2")) link = first(html.find("link", {"rel": "canonical"})) link = link.params["href"] title = dhtmlparser.removeTags(title_tag).strip() # get meta meta = html.find("p", {"class": "meta-vypis"})[0] blog = Blogpost(url=link, lazy=lazy) if lazy: blog.title = title blog.intro = Blogpost._parse_intro(html, meta, title_tag) blog.rating = Blogpost._parse_rating_from_preview(meta) blog.created_ts = parse_timestamp(meta) blog.comments_n = Blogpost._parse_comments_n(meta) return blog
def get_image_urls(self): """ Get list of links to all images used in this blog. Returns: list: List of str containing absolute URL of the image. """ image_links = ( image_tag.params["src"] for image_tag in dhtmlparser.parseString(self.text).find("img") if "src" in image_tag.params) def remote_link(link): return link.startswith("http://") or link.startswith("https://") return [ link if remote_link(link) else url_context(link) for link in image_links ]
def remove_tag(self, tag, throw=False): """ Remove tag from the tags currently assigned to blogpost. Args: tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for list of all possible tags. throw (bool): Raise error in case you are trying to remove tag that is not assigned to blogpost. Raises: KeyError: In case, that `tag` is not instance of :class:`Tag`. IndexError: In case that you are trying to remove tag which is not assigned to blogpost. ValueError: In case that :attr:`uid` is not set. Returns: list: List of :class:`Tag` objects. """ if not isinstance(tag, Tag): raise KeyError( "Tag have instance of Tag and to be from .tags()" ) if tag not in self.tags: if not throw: return self.tags raise IndexError("Can't remove unassigned tag.") if not self.uid: raise ValueError( "Can't assign tag - .uid property not set. Call .pull() or " "assign .uid manually." ) tags_xml = download(url_context( "/ajax/tags/unassign?rid=%d&tagID=%s" % (self.uid, tag.norm) )) self.tags = self.__class__._parse_tags(tags_xml) return self.tags
def list_pics(self): """ Return: list: List of URLs to pictures used in this concept. """ # init meta if not self._meta: self._init_metadata() data = download(url_context(self._meta["Správa příloh"]), session=self._session) dom = dhtmlparser.parseString(data) form = dom.find("form", {"name": "form"}) assert form, "Can't find pic form!" return [ a.params["href"] for a in first(form).find("a") if "href" in a.params ]
def get_image_urls(self): """ Get list of links to all images used in this blog. Returns: list: List of str containing absolute URL of the image. """ image_links = ( image_tag.params["src"] for image_tag in dhtmlparser.parseString(self.text).find("img") if "src" in image_tag.params ) def remote_link(link): return link.startswith("http://") or link.startswith("https://") return [ link if remote_link(link) else url_context(link) for link in image_links ]
def remove_tag(self, tag, throw=False): """ Remove tag from the tags currently assigned to blogpost. Args: tag (Tag): :class:`Tag` instance. See :class:`possible_tags` for list of all possible tags. throw (bool): Raise error in case you are trying to remove tag that is not assigned to blogpost. Raises: KeyError: In case, that `tag` is not instance of :class:`Tag`. IndexError: In case that you are trying to remove tag which is not assigned to blogpost. ValueError: In case that :attr:`uid` is not set. Returns: list: List of :class:`Tag` objects. """ if not isinstance(tag, Tag): raise KeyError("Tag have instance of Tag and to be from .tags()") if tag not in self.tags: if not throw: return self.tags raise IndexError("Can't remove unassigned tag.") if not self.uid: raise ValueError( "Can't assign tag - .uid property not set. Call .pull() or " "assign .uid manually.") tags_xml = download( url_context("/ajax/tags/unassign?rid=%d&tagID=%s" % (self.uid, tag.norm))) self.tags = self.__class__._parse_tags(tags_xml) return self.tags
def list_pics(self): """ Return: list: List of URLs to pictures used in this concept. """ # init meta if not self._meta: self._init_metadata() data = download( url_context(self._meta["Správa příloh"]), session=self._session ) dom = dhtmlparser.parseString(data) form = dom.find("form", {"name": "form"}) assert form, "Can't find pic form!" return [ a.params["href"] for a in first(form).find("a") if "href" in a.params ]
def url(self): return url_context("/stitky/%s" % self.norm)
def __init__(self, title, link, session): self.title = title self.link = url_context(link) self._meta = None self._session = session
def _get_tags(self): # parse tags tags_url = "/ajax/tags/assigned?rid=%d" % self.uid tags_xml = download(url_context(tags_url)) return self.__class__._parse_tags(tags_xml)