예제 #1
0
파일: wsj.img.py 프로젝트: susemm/wsjimg
 def find_links(self, url, response):
     '''Parse the photos news default page and find photos news page urls'''
     Log.i(self.TAG, 'find links in %s' % url)
     links = ImgPageLinks(response, self.strStart, self.strEnd)
     urls = links.getLinks(response)
     # urls = links.persistToDB(self.db)
     self.spider.add_urls(urls)
예제 #2
0
파일: duoMain.py 프로젝트: susemm/books
 def cleanTmp(self):
     '''clean the tmp dir'''
     path = os.path.join(os.path.curdir, 'tmp')
     for item in os.listdir(path):
         filename = os.path.join(path, item)
         Log.e(self.TAG, 'deleting %s' % (filename, ))
         self.deleteFileFolder(filename)
예제 #3
0
 def cleanTmp(self):
     '''clean the tmp dir'''
     path = os.path.join(os.path.curdir, 'tmp')
     for item in os.listdir(path):
         filename = os.path.join(path, item)
         Log.e(self.TAG, 'deleting %s' % (filename, ))
         self.deleteFileFolder(filename)
예제 #4
0
파일: duoMain.py 프로젝트: susemm/books
 def renameAll(self):
     path = os.path.join(os.path.curdir, 'books', 'new')
     for item in os.listdir(path):
         bid, extname = os.path.splitext(item)
         title = self.persist.getTitle(bid)
         if title:
             newName = '%s%s' % (title, extname)
             os.rename(os.path.join(path, item), os.path.join(path, newName))
             Log.i(self.TAG, '%s -> %s' % (item, newName))
예제 #5
0
파일: duoMain.py 프로젝트: susemm/books
 def mergeSingle(src):
     if not os.path.isdir(src):
         Log.w(Duokan.__name__, '[%s] is not a diractory, exit...' % (src, ))
         return
     path = os.path.split(src)
     id = path[1]
     destPath = os.path.join(path[0], id+'.pdf')
     DuoPdf.clean(src)
     DuoPdf.merge(destPath, src)
예제 #6
0
 def mergeSingle(src):
     if not os.path.isdir(src):
         Log.w(Duokan.__name__,
               '[%s] is not a diractory, exit...' % (src, ))
         return
     path = os.path.split(src)
     id = path[1]
     destPath = os.path.join(path[0], id + '.pdf')
     DuoPdf.clean(src)
     DuoPdf.merge(destPath, src)
예제 #7
0
 def renameAll(self):
     path = os.path.join(os.path.curdir, 'books', 'new')
     for item in os.listdir(path):
         bid, extname = os.path.splitext(item)
         title = self.persist.getTitle(bid)
         if title:
             newName = '%s%s' % (title, extname)
             os.rename(os.path.join(path, item),
                       os.path.join(path, newName))
             Log.i(self.TAG, '%s -> %s' % (item, newName))
예제 #8
0
 def _getProxy(self):
     self.host = ''
     self.user = ''
     self.pswd = ''
     try:
         self.host = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_HOST)
         self.user = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_USER)
         self.pswd = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_PAWD)
     except:
         Log.w(self.TAG, 'read proxy failed')
         traceback.print_exc()
예제 #9
0
파일: duoMain.py 프로젝트: susemm/books
 def _getProxy(self):
     self.host = ''
     self.user = ''
     self.pswd = ''
     try:
         self.host = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_HOST)
         self.user = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_USER)
         self.pswd = self.conf.get(Config.KEY_PROXY, Config.KEY_PROXY_PAWD)
     except:
         Log.w(self.TAG, 'read proxy failed')
         traceback.print_exc()
예제 #10
0
파일: duoPdf.py 프로젝트: suseme/books
 def cleanAllInPath(self, srcDir):
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         num = len(files)
         if num > 0:
             for i, f in enumerate(files):
                 print '%d/%d: %s' % (i, num, f)
                 fileName = self.getCleanedFileName(f)
                 srcPath = os.path.join(srcDir, f)
                 destPath = os.path.join(srcDir, fileName)
                 if (os.path.isfile(srcPath)):
                     if self.isCleanedFile(srcPath):
                         print 'skip [%s]' % srcPath
                     elif self.hasCleaned(srcPath):
                         print 'skip [%s]' % srcPath
                     else:
                         try:
                             DuoPdf.cleanPdf(srcPath, destPath)
                             # print '%s -> ' % srcPath
                             # print destPath
                         except:
                             Log.w(DuoPdf.__name__, 'clean [%s] failed' % (srcPath))
                             traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (srcPath,))
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
예제 #11
0
 def deleteFileFolder(self, src):
     '''delete files and folders'''
     if os.path.isfile(src):
         try:
             os.remove(src)
         except:
             Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
     elif os.path.isdir(src):
         for item in os.listdir(src):
             itemsrc = os.path.join(src, item)
             self.deleteFileFolder(itemsrc)
         try:
             os.rmdir(src)
         except:
             Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
예제 #12
0
파일: duoMain.py 프로젝트: susemm/books
 def deleteFileFolder(self, src):
     '''delete files and folders'''
     if os.path.isfile(src):
         try:
             os.remove(src)
         except:
             Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
     elif os.path.isdir(src):
         for item in os.listdir(src):
             itemsrc=os.path.join(src,item)
             self.deleteFileFolder(itemsrc)
         try:
             os.rmdir(src)
         except:
             Log.e(self.TAG, 'delete [%s] failed...' % (src, ))
예제 #13
0
파일: duoPdf.py 프로젝트: suseme/books
    def crop2(dest, src, margin1, margin2):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        index = 1
        for page in srcPdf.pages:
            box = page.mediaBox

            if index != 1:
                if index % 2 == 1: #odd
                    margin = margin1
                else: #even
                    margin = margin2
                # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y())
                box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3])
                box.lowerLeft  = (box.getLowerLeft_x()  + margin[0], box.getLowerLeft_y()  + margin[1])

            destPdf.addPage(page)
            index += 1
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')
예제 #14
0
파일: duoMain.py 프로젝트: susemm/books
 def onStop(self, event):
     Log.i(self.TAG, 'phantomjs finished...')
     self.persist.setDownload(self.id)
     Duokan.merge(self.id)
     Log.i(self.TAG, 'merged pdf...')
     Duokan.crop(self.id)
     Log.i(self.TAG, 'croped pdf...')
     self.dispatch(Downloader.EVT_STOP)
예제 #15
0
 def onStop(self, event):
     Log.i(self.TAG, 'phantomjs finished...')
     self.persist.setDownload(self.id)
     Duokan.merge(self.id)
     Log.i(self.TAG, 'merged pdf...')
     Duokan.crop(self.id)
     Log.i(self.TAG, 'croped pdf...')
     self.dispatch(Downloader.EVT_STOP)
예제 #16
0
파일: duoPdf.py 프로젝트: suseme/books
 def clean(srcDir):
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         if len(files) > 0:
             for i,f in enumerate(files):
                 filePath = os.path.join(srcDir, f)
                 if (os.path.isfile(filePath)):
                     try:
                         DuoPdf.cleanPdf(filePath, filePath)
                     except:
                         Log.w(DuoPdf.__name__, 'clean [%s] failed' % (filePath))
                         traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,))
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to clean' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))
예제 #17
0
파일: duoPdf.py 프로젝트: suseme/books
    def crop(dest, src, left, top, bottom, right):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))
        margin = (left, top, bottom, right)

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        for page in srcPdf.pages:
            box = page.mediaBox

            # print '%d %d %d %d' % (box.getLowerLeft_x(), box.getLowerLeft_y(), box.getUpperRight_x(), box.getUpperRight_y())
            box.upperRight = (box.getUpperRight_x() - margin[2], box.getUpperRight_y() - margin[3])
            box.lowerLeft  = (box.getLowerLeft_x()  + margin[0], box.getLowerLeft_y()  + margin[1])

            destPdf.addPage(page)
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')
예제 #18
0
파일: duoPdf.py 프로젝트: suseme/books
    def cropWH(dest, src, destWidth, destHeight):
        Log.i(DuoPdf.__name__, 'cropping file [%s]' % (src, ))

        srcFile = file(src, 'rb')
        srcPdf = PdfFileReader(srcFile)
        destPdf = PdfFileWriter()

        for page in srcPdf.pages:
            box = page.mediaBox

            width = box.getUpperRight_x() - box.getUpperRight_x()
            height = box.getUpperRight_y() - box.getLowerLeft_y()
            box.upperRight = (destWidth, box.getUpperRight_y() - (height - destHeight) / 2)
            box.lowerLeft  = (0, (height - destHeight) / 2)

            destPdf.addPage(page)
        Log.i(DuoPdf.__name__, 'saving to file [%s]...' % (dest, ))
        destFile = file(dest, 'wb')
        destPdf.write(destFile)
        destFile.close()
        srcFile.close()
        Log.i(DuoPdf.__name__, 'done')
예제 #19
0
 def openInNewTab(self, url):
     '''open in browser with new tab'''
     if len(url) > 0:
         webbrowser.open(url, new=2, autoraise=True)
     else:
         Log.e(self.TAG, 'url is empty')
예제 #20
0
파일: duoMain.py 프로젝트: susemm/books
 def openInNewTab(self, url):
     '''open in browser with new tab'''
     if len(url) > 0:
         webbrowser.open(url, new=2, autoraise=True)
     else:
         Log.e(self.TAG, 'url is empty')
예제 #21
0
파일: duoPdf.py 프로젝트: suseme/books
 def merge(dest, srcDir):
     '''merge pdf files in srcDir'''
     if os.path.exists(srcDir) and os.path.isdir(srcDir):
         files = os.listdir(srcDir)
         if len(files) > 0:
             merger = PdfFileMerger()
             for i,f in enumerate(files):
                 Log.i(DuoPdf.__name__, 'merge file [%s]' % (f, ))
                 filePath = os.path.join(srcDir, f)
                 if (os.path.isfile(filePath)):
                     try:
                         srcFileHdl = open(filePath, 'rb')
                         merger.merge(position=i, fileobj=srcFileHdl)
                     except:
                         Log.w(DuoPdf.__name__, 'merge [%s] failed' % (filePath))
                         traceback.print_exc()
                 else:
                     Log.i(DuoPdf.__name__, 'skip file [%s]' % (filePath,))
             Log.i(DuoPdf.__name__, 'save to file [%s]...' % (dest, ))
             destFileStream = file(dest, 'wb')
             merger.write(destFileStream)
             destFileStream.close()
             Log.i(DuoPdf.__name__, 'done')
         else:
             Log.w(DuoPdf.__name__, 'no file in [%s] to merge' % (srcDir))
     else:
         Log.w(DuoPdf.__name__, 'dir [%s] not exist.' % (srcDir,))