Example #1
0
def get_list_of_filtered_books(languages, formats, only_books=[]):
    if len(formats):
        qs = Book.select().join(BookFormat) \
                 .join(Format) \
                 .where(Format.mime << [FORMAT_MATRIX.get(f)
                                        for f in formats]) \
                 .group_by(Book.id)
    else:
        qs = Book.select()

    if len(only_books):
        print(only_books)
        qs = qs.where(Book.id << only_books)

    if len(languages):
        qs = qs.where(Book.language << languages)

    return qs
Example #2
0
def get_list_of_filtered_books(languages, formats, only_books=[]):
    if len(formats):
        qs = Book.select().join(BookFormat) \
                 .join(Format) \
                 .where(Format.mime << [FORMAT_MATRIX.get(f)
                                        for f in formats]) \
                 .group_by(Book.id)
    else:
        qs = Book.select()

    if len(only_books):
        print(only_books)
        qs = qs.where(Book.id << only_books)

    if len(languages):
        qs = qs.where(Book.language << languages)

    return qs
Example #3
0
def get_list_of_all_languages():
    return list(set(list([b.language for b in Book.select(Book.language)])))
Example #4
0
            urls.append(url)

    url_zip = os.path.join(u.build(), b_id + '-h' + '.zip')
    # url_utf8 = os.path.join(u.build(), b_id + '-8' + '.zip')
    url_html = os.path.join(u.build(), b_id + '-h' + '.html')
    url_htm = os.path.join(u.build(), b_id + '-h' + '.htm')

    u.with_base(UrlBuilder.BASE_TWO)
    name = ''.join(['pg', b_id])
    html_utf8 = os.path.join(u.build(), name + '.html.utf8')

    u.with_base(UrlBuilder.BASE_THREE)
    file_index = index_of_substring(files, ['html', 'htm'])
    file_name = files[file_index]['name']
    etext_nums = []
    etext_nums.extend(range(90, 100))
    etext_nums.extend(range(0, 6))
    etext_names = ["{0:0=2d}".format(i) for i in etext_nums]
    etext_urls = []
    for i in etext_names:
        etext_urls.append(os.path.join(u.build() + i, file_name))

    urls.extend([url_zip, url_htm, url_html, html_utf8])
    urls.extend(etext_urls)
    return list(set(urls))


if __name__ == '__main__':
    book = Book.get(id=9)
    print(get_urls(book))
Example #5
0
def get_list_of_all_languages():
    return list(set(list([b.language for b in Book.select(Book.language)])))
Example #6
0
def save_rdf_in_database(parser):

    # Insert author, if it not exists
    if parser.author_id:
        try:
            author_record = Author.get(gut_id=parser.author_id)
            if parser.last_name:
                author_record.last_name
            if parser.first_name:
                author_record.first_names = parser.first_name
            if parser.birth_year:
                author_record.birth_year = parser.birth_year
            if parser.death_year:
                author_record.death_year = parser.death_year
            author_record.save()
        except:
            author_record = Author.create(
                gut_id=parser.author_id,
                last_name=parser.last_name,
                first_names=parser.first_name,
                birth_year=parser.birth_year,
                death_year=parser.death_year)
    else:
        # No author, set Anonymous
        author_record = Author.get(gut_id='216')

    # Get license
    try:
        license_record = License.get(name=parser.license)
    except:
        license_record = None

    # Insert book
    book_record = Book.create(
        id=parser.gid,
        title=parser.title.strip(),
        subtitle=parser.subtitle.strip(),
        author=author_record,  # foreign key
        license=license_record,  # foreign key
        language=parser.language.strip(),
        downloads=parser.downloads
    )

    # Insert formats
    for file_type in parser.file_types:

        # Sanitize MIME
        mime = parser.file_types[file_type]
        if not mime.startswith('text/plain'):
            mime = re.sub(r'; charset=[a-z0-9-]+', '', mime)
        # else:
        #    charset = re.match(r'; charset=([a-z0-9-]+)', mime).groups()[0]

        # Insert format type
        pattern = re.sub(r'' + parser.gid, '{id}', file_type)
        pattern = pattern.split('/')[-1]

        bid = int(book_record.id)

        if bid in BAD_BOOKS_FORMATS.keys() \
            and mime in [FORMAT_MATRIX.get(f)
                         for f in BAD_BOOKS_FORMATS.get(bid)]:
            logger.error("\t**** EXCLUDING **** {} for book #{} from list."
                         .format(mime, bid))
            continue


        format_record = Format.get_or_create(
            mime=mime,
            images=file_type.endswith(
                '.images') or parser.file_types[file_type] == 'application/pdf',
            pattern=pattern)

        # Insert book format
        BookFormat.create(
            book=book_record,  # foreign key
            format=format_record  # foreign key
        )
Example #7
0
            urls.append(url)

    url_zip = os.path.join(u.build(), b_id + '-h' + '.zip')
    # url_utf8 = os.path.join(u.build(), b_id + '-8' + '.zip')
    url_html = os.path.join(u.build(), b_id + '-h' + '.html')
    url_htm = os.path.join(u.build(), b_id + '-h' + '.htm')

    u.with_base(UrlBuilder.BASE_TWO)
    name = ''.join(['pg', b_id])
    html_utf8 = os.path.join(u.build(), name + '.html.utf8')

    u.with_base(UrlBuilder.BASE_THREE)
    file_index = index_of_substring(files, ['html', 'htm'])
    file_name = files[file_index]['name']
    etext_nums = []
    etext_nums.extend(range(90, 100))
    etext_nums.extend(range(0, 6))
    etext_names = ["{0:0=2d}".format(i) for i in etext_nums]
    etext_urls = []
    for i in etext_names:
        etext_urls.append(os.path.join(u.build() + i, file_name))

    urls.extend([url_zip, url_htm, url_html, html_utf8])
    urls.extend(etext_urls)
    return list(set(urls))


if __name__ == '__main__':
    book = Book.get(id=9)
    print(get_urls(book))