コード例 #1
0
def authors_from_ids(idlist):
    ''' build a list of Author objects based on a list of author.gut_id

        Used to overcome large SELECT IN SQL stmts which peewee complains
        about. Slower !! '''
    authors = []
    for author in Author.select().order_by(Author.last_name.asc(),
                                           Author.first_names.asc()):
        if author.gut_id not in idlist:
            continue
        if author in authors:
            continue
        authors.append(author)
    return authors
コード例 #2
0
ファイル: export.py プロジェクト: emijrp/gutenberg
def authors_from_ids(idlist):
    ''' build a list of Author objects based on a list of author.gut_id

        Used to overcome large SELECT IN SQL stmts which peewee complains
        about. Slower !! '''
    authors = []
    for author in Author.select().order_by(Author.last_name.asc(),
                                           Author.first_names.asc()):
        if author.gut_id not in idlist:
            continue
        if author in authors:
            continue
        authors.append(author)
    return authors
コード例 #3
0
def save_rdf_in_database(parser):

    # Insert author, if it not exists
    if parser.author_id:
        try:
            author_record = Author.get(gut_id=parser.author_id)
            if parser.last_name:
                author_record.last_name
            if parser.first_name:
                author_record.first_names = parser.first_name
            if parser.birth_year:
                author_record.birth_year = parser.birth_year
            if parser.death_year:
                author_record.death_year = parser.death_year
            author_record.save()
        except:
            author_record = Author.create(
                gut_id=parser.author_id,
                last_name=parser.last_name,
                first_names=parser.first_name,
                birth_year=parser.birth_year,
                death_year=parser.death_year)
    else:
        # No author, set Anonymous
        author_record = Author.get(gut_id='216')

    # Get license
    try:
        license_record = License.get(name=parser.license)
    except:
        license_record = None

    # Insert book
    book_record = Book.create(
        id=parser.gid,
        title=parser.title.strip(),
        subtitle=parser.subtitle.strip(),
        author=author_record,  # foreign key
        license=license_record,  # foreign key
        language=parser.language.strip(),
        downloads=parser.downloads
    )

    # Insert formats
    for file_type in parser.file_types:

        # Sanitize MIME
        mime = parser.file_types[file_type]
        if not mime.startswith('text/plain'):
            mime = re.sub(r'; charset=[a-z0-9-]+', '', mime)
        # else:
        #    charset = re.match(r'; charset=([a-z0-9-]+)', mime).groups()[0]

        # Insert format type
        pattern = re.sub(r'' + parser.gid, '{id}', file_type)
        pattern = pattern.split('/')[-1]

        bid = int(book_record.id)

        if bid in BAD_BOOKS_FORMATS.keys() \
            and mime in [FORMAT_MATRIX.get(f)
                         for f in BAD_BOOKS_FORMATS.get(bid)]:
            logger.error("\t**** EXCLUDING **** {} for book #{} from list."
                         .format(mime, bid))
            continue


        format_record = Format.get_or_create(
            mime=mime,
            images=file_type.endswith(
                '.images') or parser.file_types[file_type] == 'application/pdf',
            pattern=pattern)

        # Insert book format
        BookFormat.create(
            book=book_record,  # foreign key
            format=format_record  # foreign key
        )
コード例 #4
0
def export_to_json_helpers(books, static_folder, languages, formats):

    def dumpjs(col, fn, var='json_data'):
        with open(os.path.join(static_folder, fn), 'w') as f:
            f.write("var {var} = ".format(var=var))
            f.write(json.dumps(col))
            f.write(";")
            # json.dump(col, f)

    # all books sorted by popularity
    logger.info("\t\tDumping full_by_popularity.js")
    dumpjs([book.to_array()
            for book in books.order_by(Book.downloads.desc())],
           'full_by_popularity.js')

    # all books sorted by title
    logger.info("\t\tDumping full_by_title.js")
    dumpjs([book.to_array()
            for book in books.order_by(Book.title.asc())],
           'full_by_title.js')

    avail_langs = get_langs_with_count(books=books)

    # language-specific collections
    for lang_name, lang, lang_count in avail_langs:
        # by popularity
        logger.info("\t\tDumping lang_{}_by_popularity.js".format(lang))
        dumpjs([book.to_array()
                for book in books.where(Book.language == lang)
                                 .order_by(Book.downloads.desc())],
                'lang_{}_by_popularity.js'.format(lang))
        # by title
        logger.info("\t\tDumping lang_{}_by_title.js".format(lang))
        dumpjs([book.to_array()
                for book in books.where(Book.language == lang)
                                 .order_by(Book.title.asc())],
                'lang_{}_by_title.js'.format(lang))
        # authors for that lang
        authors = Author.select().where(
            Author.gut_id << list(set([book.author.gut_id
                                       for book in books.filter(language=lang)])))
        logger.info("\t\tDumping authors_lang_{}.js".format(lang))
        dumpjs([author.to_array()
                for author in authors.order_by(Author.last_name.asc(),
                                               Author.first_names.asc())],
                'authors_lang_{}.js'.format(lang), 'authors_json_data')

    # author specific collections
    authors = Author.select().where(
        Author.gut_id << list(set([book.author.gut_id
                                   for book in books])))
    for author in authors:
        # by popularity
        logger.info("\t\tDumping auth_{}_by_popularity.js".format(author.gut_id))
        dumpjs([book.to_array()
                for book in books.where(Book.author == author)
                                 .order_by(Book.downloads.desc())],
                'auth_{}_by_popularity.js'.format(author.gut_id))
        # by title
        logger.info("\t\tDumping auth_{}_by_title.js".format(author.gut_id))
        dumpjs([book.to_array()
                for book in books.where(Book.author == author)
                                 .order_by(Book.title.asc())],
                'auth_{}_by_title.js'.format(author.gut_id))

    # authors list sorted by name
    logger.info("\t\tDumping authors.js")
    dumpjs([author.to_array()
            for author in authors.order_by(Author.last_name.asc(),
                                           Author.first_names.asc())],
                'authors.js', 'authors_json_data')


    # languages list sorted by code
    logger.info("\t\tDumping languages.js")
    dumpjs(avail_langs, 'languages.js', 'languages_json_data')

    # languages by weight
    main_languages, other_languages = get_lang_groups(books)
    logger.info("\t\tDumping main_languages.js")
    dumpjs(main_languages, 'main_languages.js', 'main_languages_json_data')
    dumpjs(other_languages, 'other_languages.js', 'other_languages_json_data')