Beispiel #1
0
 def cleanAllInPath(self, srcDir):
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         num = len(files)
         if num > 0:
             for i, f in enumerate(files):
                 print '%d/%d: %s' % (i, num, f)
                 fileName = self.getCleanedFileName(f)
                 srcPath = os.path.join(srcDir, f)
                 destPath = os.path.join(srcDir, fileName)
                 if (os.path.isfile(srcPath)):
                     if self.isCleanedFile(srcPath):
                         print 'skip [%s]' % srcPath
                     elif self.hasCleaned(srcPath):
                         print 'skip [%s]' % srcPath
                     else:
                         try:
                             DuoPdf.cleanPdf(srcPath, destPath)
                             # print '%s -> ' % srcPath
                             # print destPath
                         except:
                             Log.w(DuoPdf.__name__, 'clean [%s] failed' % (srcPath))
                             traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (srcPath,))
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
Beispiel #2
0
 def find_links(self, url, response):
     '''Parse the photos news default page and find photos news page urls'''
     Log.i(self.TAG, 'find links in %s' % url)
     links = ImgPageLinks(response, self.strStart, self.strEnd)
     urls = links.getLinks(response)
     # urls = links.persistToDB(self.db)
     self.spider.add_urls(urls)
Beispiel #3
0
 def renameAll(self):
     path = os.path.join(os.path.curdir, 'books', 'new')
     for item in os.listdir(path):
         bid, extname = os.path.splitext(item)
         title = self.persist.getTitle(bid)
         if title:
             newName = '%s%s' % (title, extname)
             os.rename(os.path.join(path, item), os.path.join(path, newName))
             Log.i(self.TAG, '%s -> %s' % (item, newName))
Beispiel #4
0
 def renameAll(self):
     path = os.path.join(os.path.curdir, 'books', 'new')
     for item in os.listdir(path):
         bid, extname = os.path.splitext(item)
         title = self.persist.getTitle(bid)
         if title:
             newName = '%s%s' % (title, extname)
             os.rename(os.path.join(path, item),
                       os.path.join(path, newName))
             Log.i(self.TAG, '%s -> %s' % (item, newName))
Beispiel #5
0
 def merge(dest, srcDir):
     '''merge pdf files in srcDir'''
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         if len(files) > 0:
             merger = PdfFileMerger()
             for i,f in enumerate(files):
                 Log.i(DuoPdf.__name__, 'merge file [%s]' % (f, ))
                 filePath = os.path.join(srcDir, f)
                 if (os.path.isfile(filePath)):
                     try:
                         srcFileHdl = open(filePath, 'rb')
                         merger.merge(position=i, fileobj=srcFileHdl)
                     except:
                         Log.w(DuoPdf.__name__, 'merge [%s] failed' % (filePath))
                         traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,))
             Log.i(DuoPdf.__name__, 'save to file [%s]...' % (dest, ))
             destFileStream = file(dest, 'wb')
             merger.write(destFileStream)
             destFileStream.close()
             Log.i(DuoPdf.__name__, 'done')
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to merge' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
Beispiel #6
0
 def clean(srcDir):
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         if len(files) > 0:
             for i,f in enumerate(files):
                 filePath = os.path.join(srcDir, f)
                 if (os.path.isfile(filePath)):
                     try:
                         DuoPdf.cleanPdf(filePath, filePath)
                     except:
                         Log.w(DuoPdf.__name__, 'clean [%s] failed' % (filePath))
                         traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,))
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
Beispiel #7
0
    def crop2(dest, src, margin1, margin2):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        index = 1
        for page in srcPdf.pages:
            box = page.mediaBox

            if index != 1:
                if index % 2 == 1: #odd
                    margin = margin1
                else: #even
                    margin = margin2
                # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y())
                box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3])
                box.lowerLeft  = (box.getLowerLeft_x()  + margin[0], box.getLowerLeft_y()  + margin[1])

            destPdf.addPage(page)
            index += 1
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')
Beispiel #8
0
 def onStop(self, event):
     Log.i(self.TAG, 'phantomjs finished...')
     self.persist.setDownload(self.id)
     Duokan.merge(self.id)
     Log.i(self.TAG, 'merged pdf...')
     Duokan.crop(self.id)
     Log.i(self.TAG, 'croped pdf...')
     self.dispatch(Downloader.EVT_STOP)
Beispiel #9
0
 def onStop(self, event):
     Log.i(self.TAG, 'phantomjs finished...')
     self.persist.setDownload(self.id)
     Duokan.merge(self.id)
     Log.i(self.TAG, 'merged pdf...')
     Duokan.crop(self.id)
     Log.i(self.TAG, 'croped pdf...')
     self.dispatch(Downloader.EVT_STOP)
Beispiel #10
0
    def cropWH(dest, src, destWidth, destHeight):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        for page in srcPdf.pages:
            box = page.mediaBox

            width = box.getUpperRight_x() - box.getUpperRight_x()
            height = box.getUpperRight_y() - box.getLowerLeft_y()
            box.upperRight = (destWidth, box.getUpperRight_y() - (height - destHeight) / 2)
            box.lowerLeft  = (0, (height - destHeight) / 2)

            destPdf.addPage(page)
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')
Beispiel #11
0
    def crop(dest, src, left, top, bottom, right):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))
        margin = (left, top, bottom, right)

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        for page in srcPdf.pages:
            box = page.mediaBox

            # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y())
            box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3])
            box.lowerLeft  = (box.getLowerLeft_x()  + margin[0], box.getLowerLeft_y()  + margin[1])

            destPdf.addPage(page)
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')