def find_links(self, url, response): '''Parse the photos news default page and find photos news page urls''' Log.i(self.TAG, 'find links in %s' % url) links = ImgPageLinks(response, self.strStart, self.strEnd) urls = links.getLinks(response) # urls = links.persistToDB(self.db) self.spider.add_urls(urls)
def cleanTmp(self): '''clean the tmp dir''' path = os.path.join(os.path.curdir, 'tmp') for item in os.listdir(path): filename = os.path.join(path, item) Log.e(self.TAG, 'deleting %s' % (filename, )) self.deleteFileFolder(filename)
def renameAll(self): path = os.path.join(os.path.curdir, 'books', 'new') for item in os.listdir(path): bid, extname = os.path.splitext(item) title = self.persist.getTitle(bid) if title: newName = '%s%s' % (title, extname) os.rename(os.path.join(path, item), os.path.join(path, newName)) Log.i(self.TAG, '%s -> %s' % (item, newName))
def mergeSingle(src): if not os.path.isdir(src): Log.w(Duokan.__name__, '[%s] is not a diractory, exit...' % (src, )) return path = os.path.split(src) id = path[1] destPath = os.path.join(path[0], id+'.pdf') DuoPdf.clean(src) DuoPdf.merge(destPath, src)
def mergeSingle(src): if not os.path.isdir(src): Log.w(Duokan.__name__, '[%s] is not a diractory, exit...' % (src, )) return path = os.path.split(src) id = path[1] destPath = os.path.join(path[0], id + '.pdf') DuoPdf.clean(src) DuoPdf.merge(destPath, src)
def _getProxy(self): self.host = '' self.user = '' self.pswd = '' try: self.host = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_HOST) self.user = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_USER) self.pswd = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_PAWD) except: Log.w(self.TAG, 'read proxy failed') traceback.print_exc()
def cleanAllInPath(self, srcDir): if os.path.exists(srcDir) and os.path.isdir(srcDir): files = os.listdir(srcDir) num = len(files) if num > 0: for i, f in enumerate(files): print '%d/%d: %s' % (i, num, f) fileName = self.getCleanedFileName(f) srcPath = os.path.join(srcDir, f) destPath = os.path.join(srcDir, fileName) if (os.path.isfile(srcPath)): if self.isCleanedFile(srcPath): print 'skip [%s]' % srcPath elif self.hasCleaned(srcPath): print 'skip [%s]' % srcPath else: try: DuoPdf.cleanPdf(srcPath, destPath) # print '%s -> ' % srcPath # print destPath except: Log.w(DuoPdf.__name__, 'clean [%s] failed' % (srcPath)) traceback.print_exc() else: Log.i(DuoPdf.__name__, 'skip file [%s]' % (srcPath,)) else: Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir)) else: Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
def deleteFileFolder(self, src): '''delete files and folders''' if os.path.isfile(src): try: os.remove(src) except: Log.e(self.TAG, 'delete [%s] failed...' % (src, )) elif os.path.isdir(src): for item in os.listdir(src): itemsrc = os.path.join(src, item) self.deleteFileFolder(itemsrc) try: os.rmdir(src) except: Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
def deleteFileFolder(self, src): '''delete files and folders''' if os.path.isfile(src): try: os.remove(src) except: Log.e(self.TAG, 'delete [%s] failed...' % (src, )) elif os.path.isdir(src): for item in os.listdir(src): itemsrc=os.path.join(src,item) self.deleteFileFolder(itemsrc) try: os.rmdir(src) except: Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
def crop2(dest, src, margin1, margin2): Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, )) srcFile = file(src, 'rb') srcPdf = PdfFileReader(srcFile) destPdf = PdfFileWriter() index = 1 for page in srcPdf.pages: box = page.mediaBox if index != 1: if index % 2 == 1: #odd margin = margin1 else: #even margin = margin2 # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y()) box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3]) box.lowerLeft = (box.getLowerLeft_x() + margin[0], box.getLowerLeft_y() + margin[1]) destPdf.addPage(page) index += 1 Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, )) destFile = file(dest, 'wb') destPdf.write(destFile) destFile.close() srcFile.close() Log.i(DuoPdf.__name__, 'done')
def onStop(self, event): Log.i(self.TAG, 'phantomjs finished...') self.persist.setDownload(self.id) Duokan.merge(self.id) Log.i(self.TAG, 'merged pdf...') Duokan.crop(self.id) Log.i(self.TAG, 'croped pdf...') self.dispatch(Downloader.EVT_STOP)
def clean(srcDir): if os.path.exists(srcDir) and os.path.isdir(srcDir): files = os.listdir(srcDir) if len(files) > 0: for i,f in enumerate(files): filePath = os.path.join(srcDir, f) if (os.path.isfile(filePath)): try: DuoPdf.cleanPdf(filePath, filePath) except: Log.w(DuoPdf.__name__, 'clean [%s] failed' % (filePath)) traceback.print_exc() else: Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,)) else: Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir)) else: Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
def crop(dest, src, left, top, bottom, right): Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, )) margin = (left, top, bottom, right) srcFile = file(src, 'rb') srcPdf = PdfFileReader(srcFile) destPdf = PdfFileWriter() for page in srcPdf.pages: box = page.mediaBox # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y()) box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3]) box.lowerLeft = (box.getLowerLeft_x() + margin[0], box.getLowerLeft_y() + margin[1]) destPdf.addPage(page) Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, )) destFile = file(dest, 'wb') destPdf.write(destFile) destFile.close() srcFile.close() Log.i(DuoPdf.__name__, 'done')
def cropWH(dest, src, destWidth, destHeight): Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, )) srcFile = file(src, 'rb') srcPdf = PdfFileReader(srcFile) destPdf = PdfFileWriter() for page in srcPdf.pages: box = page.mediaBox width = box.getUpperRight_x() - box.getUpperRight_x() height = box.getUpperRight_y() - box.getLowerLeft_y() box.upperRight = (destWidth, box.getUpperRight_y() - (height - destHeight) / 2) box.lowerLeft = (0, (height - destHeight) / 2) destPdf.addPage(page) Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, )) destFile = file(dest, 'wb') destPdf.write(destFile) destFile.close() srcFile.close() Log.i(DuoPdf.__name__, 'done')
def openInNewTab(self, url): '''open in browser with new tab''' if len(url) > 0: webbrowser.open(url, new=2, autoraise=True) else: Log.e(self.TAG, 'url is empty')
def merge(dest, srcDir): '''merge pdf files in srcDir''' if os.path.exists(srcDir) and os.path.isdir(srcDir): files = os.listdir(srcDir) if len(files) > 0: merger = PdfFileMerger() for i,f in enumerate(files): Log.i(DuoPdf.__name__, 'merge file [%s]' % (f, )) filePath = os.path.join(srcDir, f) if (os.path.isfile(filePath)): try: srcFileHdl = open(filePath, 'rb') merger.merge(position=i, fileobj=srcFileHdl) except: Log.w(DuoPdf.__name__, 'merge [%s] failed' % (filePath)) traceback.print_exc() else: Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,)) Log.i(DuoPdf.__name__, 'save to file [%s]...' % (dest, )) destFileStream = file(dest, 'wb') merger.write(destFileStream) destFileStream.close() Log.i(DuoPdf.__name__, 'done') else: Log.w(DuoPdf.__name__, 'no file in [%s] to merge' % (srcDir)) else: Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))