Example #1
0
    def _add_website(self):
        website = self.session.query(Website).filter_by(
            name=self.source).first()

        if not website:
            website = Website(name=self.source, url=get_base_url(self.url))
            self.session.add(website)
Example #2
0
 def chapter_list(self):
     clist = self.doc('#list')('dt:eq(1)').next_all('dd').filter(
         lambda i, e: PyQuery(e)('a').attr('href')).map(lambda i, e: (
             i,
             urljoin(utils.get_base_url(self.url),
                     PyQuery(e)('a').attr('href')), PyQuery(e).text()))
     return clist
Example #3
0
 def chapter_list(self):
     clist = self.doc('.mu_contain:eq(1)')('.mulu_list:last').prev_all(
         '.mulu_list')('li').filter(
             lambda i, e: PyQuery(e)('a').attr('href')).map(lambda i, e: (
                 i,
                 urljoin(utils.get_base_url(self.url),
                         PyQuery(e)('a').attr('href')), PyQuery(e).text()))
     return clist
Example #4
0
 def chapter_list(self):
     clist = self.doc('.list-charts:first li').filter(
         lambda i, e: PyQuery(e)('a').attr('href')
     ).map(
         lambda i, e: (i,
                       urljoin(utils.get_base_url(self.url),
                               PyQuery(e)('a').attr('href')),
                       self.refine(PyQuery(e).text()))
     )
     return clist
Example #5
0
 def _chapter_list_with_sel(self, selector, chap_type):
     clist = self.doc(selector).filter(lambda i, e: PyQuery(e)
                                       ('a').attr('href'))
     if chap_type == ChapterType.whole:
         clist = clist.map(lambda i, e: (i, PyQuery(e)('a').attr('href'),
                                         PyQuery(e).text()))
     elif chap_type == ChapterType.path:
         clist = clist.map(lambda i, e: (
             i, urljoin(get_base_url(self.url),
                        PyQuery(e)('a').attr('href')), PyQuery(e).text()))
     elif chap_type == ChapterType.last:
         clist = clist.map(lambda i, e: (
             i, urljoin(self.url,
                        PyQuery(e)('a').attr('href')), PyQuery(e).text()))
     elif chap_type == ChapterType.noscheme:
         clist = clist.map(lambda i, e: (i,
                                         change_url_scheme(
                                             PyQuery(e)('a').attr('href'),
                                             get_url_scheme(self.url)),
                                         PyQuery(e).text()))
     else:
         raise NameError('chap_type')
     return clist
Example #6
0
 def chapter_list(self):
     clist = self.doc('td a').map(lambda i, e: (
         i, urljoin(utils.get_base_url(self.url),
                    PyQuery(e).attr('href')), PyQuery(e).text()))
     return clist