def getEpubMetadata(self, path=None): os.chdir(self.book.bookPath) file_name = self.book.bookName + '.epub' epubBook = EpubBook() epubBook.open(file_name) epubBook.parse_contents() authorList = list() for authorName in epubBook.get_authors(): author = Author() author.authorName = authorName authorList.append(author) self.book.authors = authorList self.book.tag = epubBook.subjectTag epubBook.extract_cover_image(outdir='.') self.book.createdOn = datetime.now()
def onDone(self, event): print 'done' authorList = list() for key, row in self.rowDict.items(): if row.label.GetLabel() == 'Cover': imgFilePath = os.path.join( row.leftText.currentBook.bookPath, row.leftText.currentBook.bookImgName) destinationImgFilePath = os.path.join( self.currentBook.bookPath, self.currentBook.imageFileName) shutil.copy(imgFilePath, destinationImgFilePath) elif row.label.GetLabel() == 'Title': self.currentBook.bookName = row.leftText.GetValue() elif row.label.GetLabel() == 'Authors': author = Author() author.authorName = row.leftText.GetValue() authorList.append(author) self.currentBook.authors = authorList elif row.label.GetLabel() == 'Series': self.currentBook.series = row.leftText.GetValue() elif row.label.GetLabel() == 'Tags': self.currentBook.tag = row.leftText.GetValue() elif row.label.GetLabel() == 'Rating': self.currentBook.rating = row.leftText.GetValue() elif row.label.GetLabel() == 'Publisher': self.currentBook.publisher = row.leftText.GetValue() elif row.label.GetLabel() == 'ISBN-13': self.currentBook.isbn_13 = row.leftText.GetValue() elif row.label.GetLabel() == 'ISBN-10': self.currentBook.isbn_10 = row.leftText.GetValue() elif row.label.GetLabel() == 'Language': self.currentBook.inLanguage = row.leftText.GetValue() ReadWriteJsonInfo().writeJsonToDir(self.currentBook.bookPath, self.currentBook)
def getPdfMetadata(self, path=None): ''' This method will get the pdf metadata and return book object. ''' logger.debug('getPdfMetadata path: %s', path) if path: try: input = PdfFileReader(open(path, "rb")) logger.debug('getIsEncrypted : %s ', input.getIsEncrypted()) except Exception as e: logger.error(e, exc_info=True) pdf_info = None try: pdf_toread = PdfFileReader(open(path, "rb")) if pdf_toread.isEncrypted: try: pdf_toread.decrypt('') except Exception as e: logger.error(e, exc_info=True) except Exception as e: logger.error(e, exc_info=True) try: pdf_info = pdf_toread.getDocumentInfo() logger.debug('NumPages:%s', pdf_toread.getNumPages()) self.book.numberOfPages = pdf_toread.getNumPages() # value = pdf_info.subject subject = None if pdf_info.subject and type(pdf_info.subject) == str: # Ignore errors even if the string is not proper UTF-8 or has # broken marker bytes. # Python built-in function unicode() can do this. subject = pdf_info.subject # else: # # Assume the value object has proper __unicode__() method # value = unicode(pdf_info.subject) # print 'else' if not self.book.tag and subject: self.book.tag = subject elif self.book.tag and subject: self.book.tag = self.book.tag + '' + subject except Exception as e: logger.error(e, exc_info=True) try: if pdf_info.title != None and pdf_info.title.strip() != '': self.book.bookName = str(pdf_info.title) except Exception as e: logger.error(e, exc_info=True) try: if pdf_info.creator: self.book.publisher = str(pdf_info.creator.encode('utf-8')) except Exception as e: logger.error(e, exc_info=True) self.book.createdOn = datetime.now() try: # print str(pdf_info['/CreationDate'])[2:10] date = datetime.strptime( str(pdf_info['/CreationDate'])[2:10], '%Y%m%d') self.book.publishedOn = date except Exception as e: logger.error(e, exc_info=True) logger.error('CreationDate not found') logger.debug(Util().convert_bytes(os.path.getsize(path))) self.book.fileSize = Util().convert_bytes(os.path.getsize(path)) # if 'ISBN'.lower() in str(pdf_info['/Subject']).lower(): # self.book.isbn_13 = str(pdf_info['/Subject'])[6:] author = Author() val = 'Unknown' try: if pdf_info.author != None and pdf_info.author.strip() != '': val = pdf_info.author # val = val.encode("utf8", "ignore") except Exception as e: logger.error(e, exc_info=True) author.authorName = val authorList = list() authorList.append(author) self.book.authors = authorList
def getPdfMetadata(self, path=None): ''' This method will get the pdf metadata and return book object. ''' print path if path: try: input = PdfFileReader(open(path, "rb")) print 'getPdfMetadata', input.getIsEncrypted() except: pass pdf_info = None try: pdf_toread = PdfFileReader(open(path, "rb")) if pdf_toread.isEncrypted: try: pdf_toread.decrypt('') except: traceback.print_exc() except: pass try: pdf_info = pdf_toread.getDocumentInfo() print 'Pages:', pdf_toread.getNumPages() self.book.numberOfPages = pdf_toread.getNumPages() # value = pdf_info.subject if type(pdf_info.subject) == str: # Ignore errors even if the string is not proper UTF-8 or has # broken marker bytes. # Python built-in function unicode() can do this. value = unicode(pdf_info.subject, "utf-8", errors="ignore") else: # Assume the value object has proper __unicode__() method value = unicode(pdf_info.subject) print 'else' if not self.book.tag : self.book.tag = value else: self.book.tag = self.book.tag + '' + value except: traceback.print_exc() try: if pdf_info.title != None and pdf_info.title.strip() != '': self.book.bookName = str(pdf_info.title) except: print 'unable to set bookName', traceback.print_exc() try: if pdf_info.creator: self.book.publisher = str(pdf_info.creator.encode('utf-8')) except: pass self.book.createdOn = datetime.now() try: print str(pdf_info['/CreationDate'])[2:10] date = datetime.strptime(str(pdf_info['/CreationDate'])[2:10] , '%Y%m%d') self.book.publishedOn = date except: print 'CreationDate not found' print path print Util().convert_bytes(os.path.getsize(path)) self.book.fileSize = Util().convert_bytes(os.path.getsize(path)) # if 'ISBN'.lower() in str(pdf_info['/Subject']).lower(): # self.book.isbn_13 = str(pdf_info['/Subject'])[6:] author = Author() val = 'Unknown' try: if pdf_info.author !=None and pdf_info.author.strip()!='': val = pdf_info.author val = val.encode("utf8", "ignore") except: pass author.authorName = val authorList = list() authorList.append(author) self.book.authors = authorList