Python Retriever Examples

Programming Language: Python

Namespace/Package Name: analyser.parsers.LibRu.Retriever

Class/Type: Retriever

Examples at hotexamples.com: 4

Python Retriever - 4 examples found. These are the top rated real world Python examples of analyser.parsers.LibRu.Retriever.Retriever extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

cleanup_kilobytes(1)

extract_authors(1)

get_accept_books(1)

get_accept_dirs(1)

get_authors_and_title(1)

Example #1

Show file

File: LibRu.py Project: ktisha/ebook-service

    def execute(self):
        book = Book.objects.get(id=self.book_id)
        title = Retriever.cleanup_kilobytes(book.title)
        title = preprocess_title(title)

        new_authors, new_title = Retriever.extract_authors(title)
        if new_authors:
            title = new_title
            authors = [ Author.objects.get_or_create(name=author)[0] for author in new_authors]
            book.author = authors
        else:
            authors = book.author.all()

        title = preprocess_title(title)
        if book.title != title:
            print "%d %s : '%s' %s=> %s : '%s'" % ( book.id,
                                                " % ".join([a.name.encode('utf-8') for a in book.author.all() ] ),
                                                book.title.encode('utf-8'),
                                               "="*(3 if new_authors else 0),
                                                 " % ".join([a.name.encode('utf-8') for a in authors ] ),
                                                title.encode('utf-8'),
                                              )

        book.title = title
        book.credit = 1
        book.save()
        return True

Example #2

Show file

File: LibRu_tests.py Project: ktisha/ebook-service

def get_books_test():
    link = 'http://lib.ru/STRUGACKIE/'
    soup = download_soup(link)
    all_tags = Retriever.get_accept_books(soup,link)
#    for link, tag in all_tags:
#        print link, tag.encode('utf8')
        #print "'%s' -- %s" % ( link, '1')#tag.decode('utf8') )
    assert len(all_tags) == 99
    pass

Example #3

Show file

File: LibRu_tests.py Project: ktisha/ebook-service

def get_dirs_test():
    soup = download_soup('http://lib.ru/')
    all_tags = Retriever.get_accept_dirs(soup)
    for link,tag in all_tags:
        print link,tag.encode('utf8')
#    print
    keys = [tag[0] for tag in all_tags]
    keys.sort()
    #print len(keys)
    assert len(keys) == 64

Example #4

Show file

File: LibRu_tests.py Project: ktisha/ebook-service

def get_authors_title_test():
    import urllib
    l = 'http://lib.ru/TXT/ruscience.txt'
    page = urllib.urlopen(l+'_Ascii.txt')
    text = page.read(2048)
    ud = UniversalDetector()
    ud.feed(text)
    ud.close()
    encoding = ud.result['encoding']
    text = unicode(text, encoding)
    authors, title = Retriever.get_authors_and_title(text)
    assert len(authors) == 1
    assert authors[0] == u'Дмитрий Толмацкий'
    assert title == u'Российская наука на пути из реанимации в морг'
#    print 'authors', ",".join( [author.encode('utf8') for author in authors ] )
#    print 'title',title
    pass