def save(self, *args, **kwargs): assert self.file_sha256sum if not self.cover_img: # FIXME: we should use mimetype if self.book_file.name.endswith('.epub'): # get the cover path from the epub file epub_file = Epub(self.book_file) cover_path = epub_file.get_cover_image_path() if cover_path is not None and os.path.exists(cover_path): cover_file = File(open(cover_path, "rb")) self.cover_img.save(os.path.basename(cover_path), # pylint: disable=no-member cover_file) epub_file.close() super(Book, self).save(*args, **kwargs)
def test_simple_import(self): epub = Epub("examples/The Dunwich Horror.epub") info = epub.get_info() self.assertEqual(info.title, "The Dunwich Horror") self.assertEqual(info.creator, "H. P. Lovecraft") epub.close()
def handle(self, *args, **options): dirpath = options.get('dirpath') if not dirpath or not os.path.exists(dirpath): raise CommandError("%r is not a valid path" % dirpath) if os.path.isdir(dirpath): names = get_epubs(dirpath) for name in names: info = None try: e = Epub(name) info = e.get_info() e.close() except: print("%s is not a valid epub file" % name) continue lang = Language.objects.filter(code=info.language) if not lang: for data in langs: if data[0] == info.language: lang = Language() lang.label = data[1] lang.save() break else: lang = lang[0] #XXX: Hacks below if not info.title: info.title = '' if not info.summary: info.summary = '' if not info.creator: info.creator = '' if not info.rights: info.rights = '' if not info.date: info.date = '' if not info.identifier: info.identifier = {} if not info.identifier.get('value'): info.identifier['value'] = '' f = open(name, "rb") sha = sha256_sum(open(name, "rb")) pub_status = Status.objects.get(status='Published') author = Author.objects.get_or_create(a_author=info.creator)[0] book = Book(a_title = info.title, a_author = author, a_summary = info.summary, file_sha256sum=sha, a_rights = info.rights, dc_identifier = info.identifier['value'].strip('urn:uuid:'), dc_issued = info.date, a_status = pub_status, mimetype="application/epub+zip") try: # Not sure why this errors, book_file.save exists book.book_file.save(os.path.basename(name), File(f)) #pylint: disable=no-member book.validate_unique() book.save() # FIXME: Find a better way to do this. except IntegrityError as e: if str(e) == "column file_sha256sum is not unique": print("The book (", book.book_file, ") was not saved because the file already exsists in the database.") else: if options['ignore_error']: print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) continue raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) except: if options['ignore_error']: print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) continue raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
def handle(self, *args, **options): dirpath = options.get('dirpath') if not dirpath or not os.path.exists(dirpath): raise CommandError("%r is not a valid path" % dirpath) if os.path.isdir(dirpath): names = get_epubs(dirpath) for name in names: info = None try: e = Epub(name) info = e.get_info() e.close() except: print("%s is not a valid epub file" % name) continue lang = Language.objects.filter(code=info.language) if not lang: for data in langs: if data[0] == info.language: lang = Language() lang.label = data[1] lang.save() break else: lang = lang[0] #XXX: Hacks below if not info.title: info.title = '' if not info.summary: info.summary = '' if not info.creator: info.creator = '' if not info.rights: info.rights = '' if not info.date: info.date = '' if not info.identifier: info.identifier = {} if not info.identifier.get('value'): info.identifier['value'] = '' f = open(name, "rb") sha = sha256_sum(open(name, "rb")) pub_status = Status.objects.get(status='Published') author = Author.objects.get_or_create(a_author=info.creator)[0] book = Book( a_title=info.title, a_author=author, a_summary=info.summary, file_sha256sum=sha, a_rights=info.rights, dc_identifier=info.identifier['value'].strip('urn:uuid:'), dc_issued=info.date, a_status=pub_status, mimetype="application/epub+zip") try: # Not sure why this errors, book_file.save exists book.book_file.save(os.path.basename(name), File(f)) #pylint: disable=no-member book.validate_unique() book.save() # FIXME: Find a better way to do this. except IntegrityError as e: if str(e) == "column file_sha256sum is not unique": print( "The book (", book.book_file, ") was not saved because the file already exsists in the database." ) else: if options['ignore_error']: print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) continue raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) except: if options['ignore_error']: print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1])) continue raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
def process_epub(self, filename, use_symlink=False): """Import a single EPUB from `filename`, creating a new `Book` based on the information parsed from the epub. :param filename: ePub file to process :param use_symlink: symlink ePub to FileField or process normally :return: success result """ # Try to parse the epub file, extracting the relevant info. info_dict = {} tmp_cover_path = None try: epub = Epub(filename) epub.get_info() # Get the information we need for creating the Model. info_dict, tmp_cover_path, subjects = epub.as_model_dict() assert info_dict except Exception as e: self.stdout.write(self.style.ERROR( "Error while parsing '%s':\n%s" % (filename, unicode(e)))) # TODO: this is not 100% reliable yet. Further modifications to # epub.py are needed. try: if tmp_cover_path: os.remove(tmp_cover_path) # close() can fail itself it _zobject failed to be initialized. epub.close() except: pass return False # Prepare some model fields that require extra care. # Language (dc_language). try: language = models.Language.objects.get_or_create_by_code( info_dict['dc_language'] ) info_dict['dc_language'] = language except: info_dict['dc_language'] = None # Original filename (original_path). info_dict['original_path'] = filename # Published status (a_status). info_dict['a_status'] = models.Status.objects.get( status=settings.DEFAULT_BOOK_STATUS) # Remove authors and publishers from dict. authors = info_dict.pop('authors', []) publishers = info_dict.pop('publishers', []) # Create and save the Book. try: # Prepare the Book. book = models.Book(**info_dict) # Use a symlink or copy the file depending on options. if use_symlink: f = LinkableFile(open(filename)) else: f = File(open(filename)) book.book_file.save(os.path.basename(filename), f, save=False) book.file_sha256sum = models.sha256_sum(book.book_file) # Validate and save. book.full_clean() book.save() # Handle info that needs existing book instance thru book.save. # authors, publishers, cover, and tags # Add authors for author in authors: if author is not None: author_split = author.strip().replace( ' and ', ';').replace('&', ';').split(';') for auth in author_split: auth = fix_authors(auth) if auth: for a in auth if not \ isinstance(auth, basestring) \ else [auth]: self.stdout.write(self.style.NOTICE( 'Found author: "%s"' % a)) book.authors.add( models.Author.objects.get_or_create( name=a)[0].pk) # Add publishers for publisher in publishers: self.stdout.write(self.style.NOTICE( 'Found publisher: "%s"' % publisher)) book.publishers.add( models.Publisher.objects.get_or_create( name=publisher)[0].pk) # Add cover image (cover_image). It is handled here as the filename # depends on instance.pk (which is only present after Book.save()). if tmp_cover_path: try: cover_filename = '%s%s' % ( book.pk, os.path.splitext(tmp_cover_path)[1] ) book.cover_img.save(cover_filename, File(open(tmp_cover_path)), save=True) except Exception as e: self.stdout.write(self.style.WARNING( 'Error while saving cover image %s:\n%s' % ( tmp_cover_path, str(e)))) tmp_cover_path = None # Add subjects as tags for subject in (subjects or []): # workaround for ePubs with description as subject if not subject or len(subject) > 80: break subject_split = subject.replace('/', ',') \ .replace(';', ',') \ .replace(':', '') \ .replace('\n', ',') \ .replace(' ,', ',') \ .replace(' ,', ',') \ .split(',') for tag in subject_split: if tag is not ' ': # The specs recommend using unicode for the tags, but # do not enforce it. As a result, tags in exotic # encodings might cause taggit to crash while trying to # create the slug. self.stdout.write(self.style.NOTICE( 'Found subject (tag): "%s"' % tag)) try: book.tags.add(tag.lower().strip()) except: try: book.tags.add( tag.encode('utf-8').lower().strip()) except: # No further efforts are made, and the tag is # not added. self.stdout.write(self.style.WARNING( 'Tag could not be added')) except Exception as e: # Delete .epub file in media/, if `book` is a valid object. try: if os.path.isfile(book.book_file.path): os.remove(book.book_file.path) except: pass if isinstance(e, ValidationError) and 'already exists' in str(e): self.stdout.write(self.style.WARNING( 'The book (%s) was not saved because the file already ' 'exists in the database:\n%s' % (filename, str(e)))) return False else: # TODO: check for possible risen exceptions at a finer grain. raise e finally: # Delete the temporary files. epub.close() if tmp_cover_path: os.remove(tmp_cover_path) return True