Ejemplo n.º 1
0
 def retrieve(self,text):
     bi = BookInfo()
     bi.authors, bi.title = Retriever.get_authors_and_title(text)
     bi.pagelink = self.link
     bi.links['txt'] = self.link
     tag = Retriever.get_tag_by_link(self.link)
     bi.tags = [tag] if tag else []
     return bi
Ejemplo n.º 2
0
 def execute(self,html):
     soup = get_soup(html)
     book_info = BookInfo()
     book_info.title  = self.description['title']
     book_info.authors = [ "%s %s %s" % (self.description['firstname'],self.description['middlename'], self.description['lastname']) ]
     book_info.pagelink = self.link
     book_info.language = self.description['language']
     book_info.summary = Retriever.get_summary(soup)
     book_info.links = Retriever.get_links(soup, self.link)
     book_info.tags = Retriever.get_tags(soup)
     book_info.image = Retriever.get_picture(soup, self.link, self.description['ID'])
     self.tasks = [ BookSavingTask(book_info) ]
     return True
Ejemplo n.º 3
0
 def get_book_info(soup):
     div = soup.find('div', {'class' : 'book_body'})
     book_info = BookInfo()
     field = Retriever.get_field(div, u'Название:')
     if field:
         match = re.match(u"Название:(.+)", field.text)
         if match and match.groups():
             book_info.title = match.groups(0)[0]
     #author
     field = Retriever.get_field(div, u'Автор:')
     book_info.authors = [ anchor.text for anchor in field.findAll('a')] if field else []
     #summary
     field = Retriever.get_field(div, u'Описание книги:')
     if field and field.p:
         book_info.summary = field.p.text
     #images
     img = soup.find('img', {'class' : 'thumb'})
     book_info.image = img['src'] if img else None
     #tags
     field = Retriever.get_field(div, u'Жанр:')
     if field:
         book_info.tags = [ tag.text for tag in field.findAll('a')]
     #links
     field = Retriever.get_field(div, u'Скачать книгу бесплатно:')
     if field:
         for link_anchor in field.findAll('a'):
             book_info.links [link_anchor.text] = link_anchor['href']
     return book_info
Ejemplo n.º 4
0
 def get_bookinfo(entry):
     book_info = BookInfo()
     book_info.title = entry.title.text
     book_info.authors = [entry.author.findChild().text]
     # language
     language = entry.find("dc:language") or entry.find("dcterms:language")
     book_info.language = language.text if language else "?"
     # links
     for format in Retriever.known_formats:
         link_to_book_tag = entry.find("link", type="application/" + format)
         if link_to_book_tag:
             book_info.links[format] = link_to_book_tag["href"]
     if not book_info.links:
         return None
     # summary
     summary = entry.find("content") or entry.find("summary")
     book_info.summary = summary.text if summary else None
     # tags
     categories = entry.findAll("category")
     if categories:
         for category in categories:
             label = None
             if category.has_key("label"):
                 label = category["label"]
             elif category.has_key("term"):
                 label = category["term"]
             if label:
                 book_info.tags.append(label)
     return book_info
Ejemplo n.º 5
0
def get_bookinfo_from_db(id):
    bookinfo = BookInfo()
    book = Book.objects.get(id=id)
    bookinfo.title = book.title
    bookinfo.authors = [ author.name for author in book.author.all() ]
    bookinfo.pagelink = book.pagelink
    bookinfo.language = book.language.short
    annotations = book.annotation_set.all()
    if annotations:
        bookinfo.summary = annotations[0]
    for bookfile in book.book_file.all():
        bookinfo.links[bookfile.type] = bookfile.link
    bookinfo.tags = [ tag.name for tag in book.tag.all()]
    bookinfo.image = book.image.name if book.image else None
    return bookinfo
Ejemplo n.º 6
0
 def get_book_info(soup):
     book_info = BookInfo()
     book_info.title = Retriever.get_field(soup, u'Название:')
     authors = Retriever.get_field(soup, u'Автор\(ы\):')
     if authors:
         book_info.authors = [ authors ]
     book_info.summary = Retriever.get_field(soup, u'Описание:')
     book_info.language = Retriever.get_field(soup, u'Язык:')
     format = Retriever.get_field(soup, u'Формат:')
     link = Retriever.get_field(soup, u'Ссылка 1:', False).a['href']
     book_info.links = {format : link}
     return book_info
Ejemplo n.º 7
0
 def get_bookinfo_from_tag_dl(dl):
     bookinfo = BookInfo()
     anchors = dl.dt.li.findAll("a")
     author_anchor = anchors[0]
     title_anchor = anchors[1]
     bookinfo.links["shtml"] = title_anchor["href"]
     bookinfo.authors = [author_anchor.text]
     bookinfo.title = title_anchor.text
     bookinfo.language = "ru"
     dd = dl.findAll("dd")
     if dd:
         bookinfo.summary = dd[0].text
     return bookinfo