def get_blogposts(self): """ Lists all of users PUBLISHED blogposts. For unpublished, see :meth:`get_concepts`. Returns: list: sorted (old->new) list of Blogpost objects. """ if not self.has_blog: return [] def cut_crap(data): data = data.split( '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>' )[0] return data.split('<div class="st" id="st">')[1] cnt = 0 posts = [] parsed = [1] # just placeholder for first iteration while parsed: data = self._get(self._compose_blogposts_url(cnt)) dom = dhtmlparser.parseString(cut_crap(data)) parsed = [ Blogpost.from_html(blog_html) for blog_html in dom.find("div", {"class": "cl"}) ] posts.extend(parsed) cnt += BLOG_STEP return sorted(posts, key=lambda x: x.created_ts)
def iter_blogposts(start=0, end=None, lazy=True): """ Iterate over blogs. Based at bloglist. Args: start (int, default 0): Start at this page. end (int, default None): End at this page. lazy (bool, default True): Initialize :class:`.Blogpost` objects only with informations from listings. Don't download full text and comments. Yields: obj: :class:`.Blogpost` objects. """ for cnt, url in enumerate(_next_blog_url(start)): data = _shared.download(url) data = _remove_crap_from_bloglist(data) # parse basic info about all blogs at page dom = _dhtmlparser.parseString(data) for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})): yield Blogpost.from_html(blog, lazy=lazy) # every page has 25 blogposts, but somethimes I am getting more if bcnt >= 24: break # detect end of pagination at the bottom if not _should_continue(dom): break if end is not None and cnt >= end: break
def get_blogposts(self): """ Lists all of users PUBLISHED blogposts. For unpublished, see :meth:`get_concepts`. Returns: list: sorted (old->new) list of Blogpost objects. """ if not self.has_blog: return [] def cut_crap(data): data = data.split( '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>')[0] return data.split('<div class="st" id="st">')[1] cnt = 0 posts = [] parsed = [1] # just placeholder for first iteration while parsed: data = self._get(self._compose_blogposts_url(cnt)) dom = dhtmlparser.parseString(cut_crap(data)) parsed = [ Blogpost.from_html(blog_html) for blog_html in dom.find("div", {"class": "cl"}) ] posts.extend(parsed) cnt += BLOG_STEP return sorted(posts, key=lambda x: x.created_ts)
def iter_blogposts(start=0, end=None, lazy=True): """ Iterate over blogs. Based at bloglist. Args: start (int, default 0): Start at this page. end (int, default None): End at this page. lazy (bool, default True): Initialize :class:`.Blogpost` objects only with informations from listings. Don't download full text and comments. Yields: obj: :class:`.Blogpost` objects. """ for cnt, url in enumerate(_next_blog_url(start)): data = _shared.download(url) # clean crap, get just content data = data.split( '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>' )[0] data = data.split('<div class="st" id="st">')[1] # some blogs have openning comment in perex, which f***s ups bloglist # - this will close comments that goes over bloglist data = data.replace( '<div class="signature">', '<!-- --><div class="signature">' ) # parse basic info about all blogs at page dom = _dhtmlparser.parseString(data) for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})): yield Blogpost.from_html(blog, lazy=lazy) # every page has 25 blogposts, but somethimes I am getting more if bcnt >= 24: break # detect end of pagination at the bottom if not _should_continue(dom): break if end is not None and cnt >= end: break
def iter_blogposts(start=0, end=None, lazy=True): """ Iterate over blogs. Based at bloglist. Args: start (int, default 0): Start at this page. end (int, default None): End at this page. lazy (bool, default True): Initialize :class:`.Blogpost` objects only with informations from listings. Don't download full text and comments. Yields: obj: :class:`.Blogpost` objects. """ for cnt, url in enumerate(_next_blog_url(start)): data = _shared.download(url) # clean crap, get just content data = data.split( '<div class="s_nadpis linkbox_nadpis">Píšeme jinde</div>')[0] data = data.split('<div class="st" id="st">')[1] # some blogs have openning comment in perex, which f***s ups bloglist # - this will close comments that goes over bloglist data = data.replace('<div class="signature">', '<!-- --><div class="signature">') # parse basic info about all blogs at page dom = _dhtmlparser.parseString(data) for bcnt, blog in enumerate(dom.findB("div", {"class": "cl"})): yield Blogpost.from_html(blog, lazy=lazy) # every page has 25 blogposts, but somethimes I am getting more if bcnt >= 24: break # detect end of pagination at the bottom if not _should_continue(dom): break if end is not None and cnt >= end: break