def authors_from_ids(idlist): ''' build a list of Author objects based on a list of author.gut_id Used to overcome large SELECT IN SQL stmts which peewee complains about. Slower !! ''' authors = [] for author in Author.select().order_by(Author.last_name.asc(), Author.first_names.asc()): if author.gut_id not in idlist: continue if author in authors: continue authors.append(author) return authors
def save_rdf_in_database(parser): # Insert author, if it not exists if parser.author_id: try: author_record = Author.get(gut_id=parser.author_id) if parser.last_name: author_record.last_name if parser.first_name: author_record.first_names = parser.first_name if parser.birth_year: author_record.birth_year = parser.birth_year if parser.death_year: author_record.death_year = parser.death_year author_record.save() except: author_record = Author.create( gut_id=parser.author_id, last_name=parser.last_name, first_names=parser.first_name, birth_year=parser.birth_year, death_year=parser.death_year) else: # No author, set Anonymous author_record = Author.get(gut_id='216') # Get license try: license_record = License.get(name=parser.license) except: license_record = None # Insert book book_record = Book.create( id=parser.gid, title=parser.title.strip(), subtitle=parser.subtitle.strip(), author=author_record, # foreign key license=license_record, # foreign key language=parser.language.strip(), downloads=parser.downloads ) # Insert formats for file_type in parser.file_types: # Sanitize MIME mime = parser.file_types[file_type] if not mime.startswith('text/plain'): mime = re.sub(r'; charset=[a-z0-9-]+', '', mime) # else: # charset = re.match(r'; charset=([a-z0-9-]+)', mime).groups()[0] # Insert format type pattern = re.sub(r'' + parser.gid, '{id}', file_type) pattern = pattern.split('/')[-1] bid = int(book_record.id) if bid in BAD_BOOKS_FORMATS.keys() \ and mime in [FORMAT_MATRIX.get(f) for f in BAD_BOOKS_FORMATS.get(bid)]: logger.error("\t**** EXCLUDING **** {} for book #{} from list." .format(mime, bid)) continue format_record = Format.get_or_create( mime=mime, images=file_type.endswith( '.images') or parser.file_types[file_type] == 'application/pdf', pattern=pattern) # Insert book format BookFormat.create( book=book_record, # foreign key format=format_record # foreign key )
def export_to_json_helpers(books, static_folder, languages, formats): def dumpjs(col, fn, var='json_data'): with open(os.path.join(static_folder, fn), 'w') as f: f.write("var {var} = ".format(var=var)) f.write(json.dumps(col)) f.write(";") # json.dump(col, f) # all books sorted by popularity logger.info("\t\tDumping full_by_popularity.js") dumpjs([book.to_array() for book in books.order_by(Book.downloads.desc())], 'full_by_popularity.js') # all books sorted by title logger.info("\t\tDumping full_by_title.js") dumpjs([book.to_array() for book in books.order_by(Book.title.asc())], 'full_by_title.js') avail_langs = get_langs_with_count(books=books) # language-specific collections for lang_name, lang, lang_count in avail_langs: # by popularity logger.info("\t\tDumping lang_{}_by_popularity.js".format(lang)) dumpjs([book.to_array() for book in books.where(Book.language == lang) .order_by(Book.downloads.desc())], 'lang_{}_by_popularity.js'.format(lang)) # by title logger.info("\t\tDumping lang_{}_by_title.js".format(lang)) dumpjs([book.to_array() for book in books.where(Book.language == lang) .order_by(Book.title.asc())], 'lang_{}_by_title.js'.format(lang)) # authors for that lang authors = Author.select().where( Author.gut_id << list(set([book.author.gut_id for book in books.filter(language=lang)]))) logger.info("\t\tDumping authors_lang_{}.js".format(lang)) dumpjs([author.to_array() for author in authors.order_by(Author.last_name.asc(), Author.first_names.asc())], 'authors_lang_{}.js'.format(lang), 'authors_json_data') # author specific collections authors = Author.select().where( Author.gut_id << list(set([book.author.gut_id for book in books]))) for author in authors: # by popularity logger.info("\t\tDumping auth_{}_by_popularity.js".format(author.gut_id)) dumpjs([book.to_array() for book in books.where(Book.author == author) .order_by(Book.downloads.desc())], 'auth_{}_by_popularity.js'.format(author.gut_id)) # by title logger.info("\t\tDumping auth_{}_by_title.js".format(author.gut_id)) dumpjs([book.to_array() for book in books.where(Book.author == author) .order_by(Book.title.asc())], 'auth_{}_by_title.js'.format(author.gut_id)) # authors list sorted by name logger.info("\t\tDumping authors.js") dumpjs([author.to_array() for author in authors.order_by(Author.last_name.asc(), Author.first_names.asc())], 'authors.js', 'authors_json_data') # languages list sorted by code logger.info("\t\tDumping languages.js") dumpjs(avail_langs, 'languages.js', 'languages_json_data') # languages by weight main_languages, other_languages = get_lang_groups(books) logger.info("\t\tDumping main_languages.js") dumpjs(main_languages, 'main_languages.js', 'main_languages_json_data') dumpjs(other_languages, 'other_languages.js', 'other_languages_json_data')