def get_list_of_filtered_books(languages, formats, only_books=[]): if len(formats): qs = Book.select().join(BookFormat) \ .join(Format) \ .where(Format.mime << [FORMAT_MATRIX.get(f) for f in formats]) \ .group_by(Book.id) else: qs = Book.select() if len(only_books): print(only_books) qs = qs.where(Book.id << only_books) if len(languages): qs = qs.where(Book.language << languages) return qs
def get_list_of_all_languages(): return list(set(list([b.language for b in Book.select(Book.language)])))
urls.append(url) url_zip = os.path.join(u.build(), b_id + '-h' + '.zip') # url_utf8 = os.path.join(u.build(), b_id + '-8' + '.zip') url_html = os.path.join(u.build(), b_id + '-h' + '.html') url_htm = os.path.join(u.build(), b_id + '-h' + '.htm') u.with_base(UrlBuilder.BASE_TWO) name = ''.join(['pg', b_id]) html_utf8 = os.path.join(u.build(), name + '.html.utf8') u.with_base(UrlBuilder.BASE_THREE) file_index = index_of_substring(files, ['html', 'htm']) file_name = files[file_index]['name'] etext_nums = [] etext_nums.extend(range(90, 100)) etext_nums.extend(range(0, 6)) etext_names = ["{0:0=2d}".format(i) for i in etext_nums] etext_urls = [] for i in etext_names: etext_urls.append(os.path.join(u.build() + i, file_name)) urls.extend([url_zip, url_htm, url_html, html_utf8]) urls.extend(etext_urls) return list(set(urls)) if __name__ == '__main__': book = Book.get(id=9) print(get_urls(book))
def save_rdf_in_database(parser): # Insert author, if it not exists if parser.author_id: try: author_record = Author.get(gut_id=parser.author_id) if parser.last_name: author_record.last_name if parser.first_name: author_record.first_names = parser.first_name if parser.birth_year: author_record.birth_year = parser.birth_year if parser.death_year: author_record.death_year = parser.death_year author_record.save() except: author_record = Author.create( gut_id=parser.author_id, last_name=parser.last_name, first_names=parser.first_name, birth_year=parser.birth_year, death_year=parser.death_year) else: # No author, set Anonymous author_record = Author.get(gut_id='216') # Get license try: license_record = License.get(name=parser.license) except: license_record = None # Insert book book_record = Book.create( id=parser.gid, title=parser.title.strip(), subtitle=parser.subtitle.strip(), author=author_record, # foreign key license=license_record, # foreign key language=parser.language.strip(), downloads=parser.downloads ) # Insert formats for file_type in parser.file_types: # Sanitize MIME mime = parser.file_types[file_type] if not mime.startswith('text/plain'): mime = re.sub(r'; charset=[a-z0-9-]+', '', mime) # else: # charset = re.match(r'; charset=([a-z0-9-]+)', mime).groups()[0] # Insert format type pattern = re.sub(r'' + parser.gid, '{id}', file_type) pattern = pattern.split('/')[-1] bid = int(book_record.id) if bid in BAD_BOOKS_FORMATS.keys() \ and mime in [FORMAT_MATRIX.get(f) for f in BAD_BOOKS_FORMATS.get(bid)]: logger.error("\t**** EXCLUDING **** {} for book #{} from list." .format(mime, bid)) continue format_record = Format.get_or_create( mime=mime, images=file_type.endswith( '.images') or parser.file_types[file_type] == 'application/pdf', pattern=pattern) # Insert book format BookFormat.create( book=book_record, # foreign key format=format_record # foreign key )