Ejemplo n.º 1
0
class CSConverter(QObject):
    _instance = None

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(CSConverter,
                                  cls).__new__(cls, *args, **kwargs)
        return cls._instance

    def __init__(self):
        super(CSConverter, self).__init__(QApplication.instance())

        self.m_webPage = QWebPage(self)

        with QPyFile(':/resources/coffee-script.js') as f:
            script = f.readAll()

        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject(
            'converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('''try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               ''')
        return result
Ejemplo n.º 2
0
class CSConverter(QObject):
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(':/resources/coffee-script.js')
        if not converter.open(QFile.ReadOnly):
            sys.exit('CoffeeScript compiler is not available!')

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject('converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('''try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }''')
        if result[0] is False:
            qWarning(result[1])
            return ''
        return result[1]
Ejemplo n.º 3
0
class CSConverter(QObject):
    _instance = None
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(CSConverter, cls).__new__(cls, *args, **kwargs)
        return cls._instance

    def __init__(self):
        QObject.__init__(self, QApplication.instance())
        self.m_webPage = QWebPage(self)

        converter = QFile(':/resources/coffee-script.js')
        if not converter.open(QFile.ReadOnly):
            sys.exit('CoffeeScript compiler is not available!')
        script = str(converter.readAll())
        converter.close()

        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject('converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('''try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               ''')
        return result
Ejemplo n.º 4
0
class CSConverter(QObject):
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(":/resources/coffee-script.js")
        if not converter.open(QFile.ReadOnly):
            sys.exit("CoffeeScript compiler is not available!")

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject("converter", self)

    def convert(self, script):
        self.setProperty("source", script)
        result = self.m_webPage.mainFrame().evaluateJavaScript(
            """try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               """
        )
        return result
Ejemplo n.º 5
0
class CSConverter(QObject):
    _instance = None
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(CSConverter, cls).__new__(cls, *args, **kwargs)
        return cls._instance

    def __init__(self):
        super(CSConverter, self).__init__(QApplication.instance())

        self.m_webPage = QWebPage(self)

        with QPyFile(':/resources/coffee-script.js') as f:
            self.m_webPage.mainFrame().evaluateJavaScript(f.readAll())
            self.m_webPage.mainFrame().addToJavaScriptWindowObject('converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('''try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               ''')
        return result
Ejemplo n.º 6
0
class CSConverter(QObject):
    def __init__(self, parent):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(":/resources/coffee-script.js")
        if not converter.open(QFile.ReadOnly):
            sys.exit("CoffeeScript compiler is not available!")

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject("converter", self)

    def convert(self, script):
        self.setProperty("source", script)
        result = self.m_webPage.mainFrame().evaluateJavaScript(
            """try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               """
        )
        return result

    @staticmethod
    def instance():
        global CSConverterInstance

        # We need only one instance of the CSConverter for our whole life
        if CSConverterInstance is None:
            CSConverterInstance = CSConverter(QCoreApplication.instance())

        return CSConverterInstance
 def importBookmarks(self):
     """
     Public method to import bookmarks.
     """
     supportedFormats = QStringList() \
         << self.trUtf8("XBEL bookmarks").append(" (*.xbel *.xml)") \
         << self.trUtf8("HTML Netscape bookmarks").append(" (*.html *.htm)")
     
     fileName = KQFileDialog.getOpenFileName(\
         None,
         self.trUtf8("Import Bookmarks"),
         QString(),
         supportedFormats.join(";;"),
         None)
     if fileName.isEmpty():
         return
     
     reader = XbelReader()
     importRootNode = None
     if fileName.endsWith(".html"):
         inFile = QFile(fileName)
         inFile.open(QIODevice.ReadOnly)
         if inFile.openMode == QIODevice.NotOpen:
             KQMessageBox.warning(None,
                 self.trUtf8("Import Bookmarks"),
                 self.trUtf8("""Error opening bookmarks file <b>%1</b>.""")\
                     .arg(fileName))
             return
         
         webpage = QWebPage()
         webpage.mainFrame().setHtml(QString(inFile.readAll()))
         result = webpage.mainFrame().evaluateJavaScript(extract_js).toByteArray()
         buffer_ = QBuffer(result)
         buffer_.open(QIODevice.ReadOnly)
         importRootNode = reader.read(buffer_)
     else:
         importRootNode = reader.read(fileName)
     
     if reader.error() != QXmlStreamReader.NoError:
         KQMessageBox.warning(None,
             self.trUtf8("Import Bookmarks"),
             self.trUtf8("""Error when importing bookmarks on line %1, column %2:\n"""
                         """%3""")\
                 .arg(reader.lineNumber())\
                 .arg(reader.columnNumber())\
                 .arg(reader.errorString()))
         return
     
     importRootNode.setType(BookmarkNode.Folder)
     importRootNode.title = self.trUtf8("Imported %1")\
         .arg(QDate.currentDate().toString(Qt.SystemLocaleShortDate))
     self.addBookmark(self.menu(), importRootNode)
Ejemplo n.º 8
0
class html2png():

    # get URL and pixel width as parameters. The width is only approximate
    def __init__(self, source, target, width):
        self.width = int(width)
        self.target = target
        #self.app = QApplication(sys.argv)
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        self.qwPage = QWebPage()

        size = QSize()
        size.setWidth(int(width))
        self.qwPage.setViewportSize(size)

        self.qwPage.connect(self.qwPage, SIGNAL("loadFinished(bool)"),
                            self.onLoadFinished)
        self.qwPage.mainFrame().load(QUrl(source))

    # do not call this function. it is called via a signal
    def onLoadFinished(self, result):
        if not result:
            sys.exit(1)

        # Set the size of the (virtual) browser window
        self.qwPage.setViewportSize(self.qwPage.mainFrame().contentsSize())

        # Paint this frame into an image
        image = QImage(self.qwPage.viewportSize(), QImage.Format_RGB32)
        painter = QPainter(image)
        self.qwPage.mainFrame().render(painter)
        painter.end()
        targetHeight = self.width * 1.4142
        numSplits = math.ceil(image.height() / targetHeight)
        for x in range(0, numSplits):
            start = (x) * targetHeight
            copy = image.copy(0, int(start), image.width(), targetHeight - 1)
            self.saveOptPNG(copy, self.target[:-4] + "." + str(x) + ".png")
        sys.exit(0)

    #optimize QImage PNG with PIL and save
    def saveOptPNG(self, img, path):
        buffer = QBuffer()
        buffer.open(QIODevice.ReadWrite)
        img.save(buffer, "PNG")

        strio = BytesIO()
        strio.write(buffer.data())
        buffer.close()
        strio.seek(0)
        pil_im = Image.open(strio)
        pil_im.save(path, "PNG", optimize=False, compress_level=9)
Ejemplo n.º 9
0
def render_html(path_to_html, width=590, height=750, as_xhtml=True):
    from PyQt4.QtWebKit import QWebPage
    from PyQt4.Qt import QEventLoop, QPalette, Qt, QUrl, QSize
    from calibre.gui2 import is_ok_to_use_qt
    if not is_ok_to_use_qt():
        return None
    path_to_html = os.path.abspath(path_to_html)
    with CurrentDir(os.path.dirname(path_to_html)):
        page = QWebPage()
        pal = page.palette()
        pal.setBrush(QPalette.Background, Qt.white)
        page.setPalette(pal)
        page.setViewportSize(QSize(width, height))
        page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
        page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
        loop = QEventLoop()
        renderer = HTMLRenderer(page, loop)
        page.loadFinished.connect(renderer, type=Qt.QueuedConnection)
        if as_xhtml:
            page.mainFrame().setContent(open(path_to_html, 'rb').read(),
                    'application/xhtml+xml', QUrl.fromLocalFile(path_to_html))
        else:
            page.mainFrame().load(QUrl.fromLocalFile(path_to_html))
        loop.exec_()
    renderer.loop = renderer.page = None
    page.loadFinished.disconnect()
    del page
    del loop
    if isinstance(renderer.exception, ParserError) and as_xhtml:
        return render_html(path_to_html, width=width, height=height,
                as_xhtml=False)
    return renderer
Ejemplo n.º 10
0
def render_html(path_to_html, width=590, height=750, as_xhtml=True):
    from PyQt4.QtWebKit import QWebPage
    from PyQt4.Qt import QEventLoop, QPalette, Qt, QUrl, QSize
    from calibre.gui2 import is_ok_to_use_qt
    if not is_ok_to_use_qt(): return None
    path_to_html = os.path.abspath(path_to_html)
    with CurrentDir(os.path.dirname(path_to_html)):
        page = QWebPage()
        pal = page.palette()
        pal.setBrush(QPalette.Background, Qt.white)
        page.setPalette(pal)
        page.setViewportSize(QSize(width, height))
        page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
        page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
        loop = QEventLoop()
        renderer = HTMLRenderer(page, loop)
        page.loadFinished.connect(renderer, type=Qt.QueuedConnection)
        if as_xhtml:
            page.mainFrame().setContent(open(path_to_html, 'rb').read(),
                    'application/xhtml+xml', QUrl.fromLocalFile(path_to_html))
        else:
            page.mainFrame().load(QUrl.fromLocalFile(path_to_html))
        loop.exec_()
    renderer.loop = renderer.page = None
    page.loadFinished.disconnect()
    del page
    del loop
    if isinstance(renderer.exception, ParserError) and as_xhtml:
        return render_html(path_to_html, width=width, height=height,
                as_xhtml=False)
    return renderer
Ejemplo n.º 11
0
    def start(self):
        '''
        Starts the conversion process.
        '''
        def myLoadProgress(progress):
            self._loadProgress = progress

        def myLoadFinished(isLoaded):
            self._loaded = True

        webpage = QWebPage()
        webpage.loadProgress.connect(myLoadProgress)
        webpage.loadFinished.connect(myLoadFinished)

        url = misc.temp_path('import')
        baseUrl = QUrl.fromLocalFile(url)
        webpage.mainFrame().setHtml(
            self._document.getMainPage(mathOutput='svg'), baseUrl)

        while not self._loaded and not self._canceled:
            qApp.processEvents()

        self.ui.label.setText('Typesetting math equations...')

        if not self._canceled:

            # Wait for the MathJax to typeset
            while not self._mathTypeset and not self._canceled:
                qApp.processEvents()
                progress = int(webpage.mainFrame().evaluateJavaScript(
                    misc.js_command('GetMathTypesetProgress', [])).toInt()[0])
                self.ui.progressBar.setValue(progress)
                self._mathTypeset = webpage.mainFrame().evaluateJavaScript(
                    misc.js_command('IsMathTypeset', [])).toBool()

            # If I haven't canceled yet, let's convert the document in a
            # separate thread
            if not self._canceled:
                self._thread = ExportToHtmlThread(
                    unicode(webpage.mainFrame().evaluateJavaScript(
                        misc.js_command('GetBodyHTML', [])).toString()),
                    self._configuration, self._assigner, self._filePath)
                self._thread.onProgress.connect(self.ui.progressBar.setValue)
                self._thread.onProgressLabel.connect(self.ui.label.setText)
                self._thread.finished.connect(self._threadFinished)
                self.ui.cancelButton.clicked.connect(self._thread.quit)
                self._thread.start()
Ejemplo n.º 12
0
class CSConverter(QObject):
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(':/resources/coffee-script.js')
        if not converter.open(QFile.ReadOnly):
            sys.exit('CoffeeScript compiler is not available!')

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject('converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('this.CoffeeScript.compile(converter.source)')
        return result if result else ''
Ejemplo n.º 13
0
class CSConverter(QObject):
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(":/resources/coffee-script.js")
        converter.open(QFile.ReadOnly)

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject("converter", self)

    def convert(self, script):
        self.setProperty("source", script)
        result = self.m_webPage.mainFrame().evaluateJavaScript("this.CoffeeScript.compile(converter.source)")
        if len(result):
            return result
        return ""
Ejemplo n.º 14
0
    def __init__(self, parent, jsonFile):
        QObject.__init__(self, parent)

        with codecs.open(jsonFile, encoding='utf-8') as fd:
            json = fd.read()

        self.settings = {
            'auth': { 'mapping': 'auth', 'default': None },
            'cookies': { 'mapping': 'cookies', 'default': None },
            'diskCache': { 'mapping': 'disk_cache', 'default': False },
            'ignoreSslErrors': { 'mapping': 'ignore_ssl_errors', 'default': False },
            'loadImages': { 'mapping': 'load_images', 'default': True },
            'loadPlugins': { 'mapping': 'load_plugins', 'default': False },
            'localAccessRemote': { 'mapping': 'local_access_remote', 'default': False },
            'outputEncoding': { 'mapping': 'output_encoding', 'default': 'System' },
            'proxy': { 'mapping': 'proxy', 'default': None },
            'scriptEncoding': { 'mapping': 'script_encoding', 'default': 'utf-8' },
            'verbose': { 'mapping': 'verbose', 'default': False }
        }

        # generate dynamic properties
        for setting in self.settings:
            self.setProperty(setting, self.settings[setting]['default'])

        # now it's time to parse our JSON file
        if not json.lstrip().startswith('{') or not json.rstrip().endswith('}'):
            qWarning('Config file MUST be in JSON format!')
            return

        file_ = QFile(':/configurator.js')
        if not file_.open(QFile.ReadOnly):
            sys.exit('Unable to load JSON configurator!')
        configurator = str(file_.readAll())
        file_.close()
        if not configurator:
            sys.exit('Unable to set-up JSON configurator!')

        webPage = QWebPage(self)

        # add config object
        webPage.mainFrame().addToJavaScriptWindowObject('config', self)
        # apply settings
        webPage.mainFrame().evaluateJavaScript(configurator.replace('%1', json))
Ejemplo n.º 15
0
class CSConverter(QObject):
    def __init__(self, parent = None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(':/resources/coffee-script.js')
        converter.open(QFile.ReadOnly)

        script = QString.fromUtf8(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject('converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('this.CoffeeScript.compile(converter.source)')
        if result.type() == QVariant.String:
            return result.toString()
        return QString()
Ejemplo n.º 16
0
    def documentation_load_finished(self, ok):
        url = self.ui.webView.url()
        url_path = str(url.path())
        url_fragment = str(url.fragment()).replace('library.zip.appfuncs.', 'appfuncs.')
        #ExeStdOut(url_path, url_fragment)
        #print ("url", url_path)
        #print ("fragment", url_fragment)
        if url_path.endswith("docs/appfuncs.html"):
            #print (self.controller.function_docs, os.path.isfile(self.controller.function_docs))
            #with open(self.controller.function_docs) as fid:
            #    doc_html = str(fid.read(), 'utf-8')
            doc_html = self.html_source()
            if url_fragment.endswith(".run"):
                url_fragment = url_fragment[:-4]


            start_tag = '<dt id="%s">' % url_fragment
            end_tag = '<dt id='

            scriptFunction_name = url_fragment.split(".")[-2]
            try:
                method_html = start_tag + doc_html.split(start_tag)[1].split(end_tag)[0]
                for a, b in [('<tt class="descname">run</tt>', '<tt class="descname">%s</tt>' % scriptFunction_name),
                    ('href="_modules', 'href="source.html?#_modules'),
                    #('href="#appfuncs', 'href="doc.html?#appfuncs'),
                    #('</th>', '</th></tr>\n<tr>'),
                    ("Parameters :", "Parameters:"),
                    ("Return :", "Return:"),
                    ]:
                    method_html = method_html.replace(a, b)
            except IndexError:
                method_html = "Documentation not found"
            self.doc_contents = method_html




        elif url_path.endswith("docs/source.html"):
            source_html_path, self.anchor = str(url.fragment()).split("#")

            with open("%s/%s" % (os.path.dirname(self.controller.function_docs), source_html_path)) as fid:
                source_html = to_str(fid.read())
            source_html = source_html.replace('<a class="viewcode-back" href="../../../ScriptFunctions.html', '<a class="viewcode-back" href="appfuncs.html')
            from PyQt4.QtWebKit import QWebPage
            p = QWebPage()
            f = p.mainFrame()
            f.setHtml(source_html)
            self.doc_contents = f.findFirstElement('div.body').toInnerXml()


            if self.anchor:
                self.ui.webView.page().mainFrame().scrollToAnchor(self.anchor)
        with open(self.doc_path + "tmp.html", 'w') as fid:
            fid.write(self.html_source())
Ejemplo n.º 17
0
    def __init__(self, parent, jsonFile):
        super(Config, self).__init__(parent)

        with codecs.open(jsonFile, encoding='utf-8') as fd:
            json = fd.read()

        self.settings = {
            'cookiesFile': { 'mapping': 'cookies_file', 'default': None },
            'debug': { 'mapping': 'debug', 'default': None },
            'diskCache': { 'mapping': 'disk_cache', 'default': False },
            'ignoreSslErrors': { 'mapping': 'ignore_ssl_errors', 'default': False },
            'loadImages': { 'mapping': 'load_images', 'default': True },
            'loadPlugins': { 'mapping': 'load_plugins', 'default': False },
            'localToRemoteUrlAccessEnabled': { 'mapping': 'local_to_remote_url_access', 'default': False },
            'maxDiskCacheSize': { 'mapping': 'max_disk_cache_size', 'default': -1 },
            'outputEncoding': { 'mapping': 'output_encoding', 'default': 'System' },
            'proxy': { 'mapping': 'proxy', 'default': None },
            'scriptEncoding': { 'mapping': 'script_encoding', 'default': 'utf-8' },
            'verbose': { 'mapping': 'verbose', 'default': False }
        }

        do_action('ConfigInit', self.settings)

        # generate dynamic properties
        for setting in self.settings:
            self.setProperty(setting, self.settings[setting]['default'])

        # now it's time to parse our JSON file
        if not json.lstrip().startswith('{') or not json.rstrip().endswith('}'):
            qWarning('Config file MUST be in JSON format!')
            return

        with QPyFile(':/configurator.js') as f:
            configurator = f.readAll()

        webPage = QWebPage(self)

        # add config object
        webPage.mainFrame().addToJavaScriptWindowObject('config', self)
        # apply settings
        webPage.mainFrame().evaluateJavaScript(configurator.replace('%1', json))
Ejemplo n.º 18
0
class Thumbnailer(QObject):
    finished = pyqtSignal()

    def __init__(self, parent=None):
        QObject.__init__(self, parent)

        self.webPage = QWebPage(self)
        self.mainFrame = self.webPage.mainFrame()

        self.webPage.loadFinished.connect(self.render)

    def load(self, url):
        qurl = QUrl(url)
        self.webPage.mainFrame().load(qurl)

    def render(self):
        self.webPage.setViewportSize(self.webPage.mainFrame().contentsSize())
        image = QImage(self.webPage.viewportSize(), QImage.Format_ARGB32)

        painter = QPainter()
        painter.begin(image)
        self.webPage.mainFrame().render(painter)
        painter.end()

        image.save('thumbnail.png')
        self.finished.emit()
def formatQAAsImage(html, type, cid, mid, fact, tags, cm, deck):
  
  # build up the html 
  div = '''<div class="card%s" id="cm%s%s">%s</div>''' % (
            type[0], type[0], hexifyID(cm.id),
            html)

  attr = type + 'Align'
  if getattr(cm, attr) == 0:
      align = "center"
  elif getattr(cm, attr) == 1:
      align = "left"
  else:
      align = "right"
  html = (("<center><table width=95%%><tr><td align=%s>" % align) +
          div + "</td></tr></table></center>")
  
  t = "<body><br><center>%s</center></body>" % (html)
  bg = "body { background-color: #fff; }\n"
  html = "<style>\n" + bg + deck.rebuildCSS() + "</style>\n" + t

  # create the web page object
  page = QWebPage()
  page.mainFrame().setHtml(html)

  # size everything all nice
  page = fitContentsInPage(page)  

  image= QImage(page.viewportSize(), QImage.Format_ARGB32_Premultiplied)
  painter = QPainter(image)

  page.mainFrame().render(painter)
  painter.end()
  path = saveImage(image, deck)

  link = u"<img src=\"%s\">" % ( path )
  #print link
  #print html
  return link
Ejemplo n.º 20
0
class HTMLTableRenderer(QObject):
    def __init__(self, html, base_dir, width, height, dpi, factor):
        '''
        `width, height`: page width and height in pixels
        `base_dir`: The directory in which the HTML file that contains the table resides
        '''
        QObject.__init__(self)

        self.app = None
        self.width, self.height, self.dpi = width, height, dpi
        self.base_dir = base_dir
        self.images = []
        self.tdir = tempfile.mkdtemp(prefix='calibre_render_table')
        self.loop = QEventLoop()
        self.page = QWebPage()
        self.page.loadFinished.connect(self.render_html)
        self.page.mainFrame().setTextSizeMultiplier(factor)
        self.page.mainFrame().setHtml(
            html, QUrl('file:' + os.path.abspath(self.base_dir)))

    def render_html(self, ok):
        try:
            if not ok:
                return
            cwidth, cheight = self.page.mainFrame().contentsSize().width(
            ), self.page.mainFrame().contentsSize().height()
            self.page.setViewportSize(QSize(cwidth, cheight))
            factor = float(self.width) / cwidth if cwidth > self.width else 1
            cutoff_height = int(self.height / factor) - 3
            image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
            image.setDotsPerMeterX(self.dpi * (100 / 2.54))
            image.setDotsPerMeterY(self.dpi * (100 / 2.54))
            painter = QPainter(image)
            self.page.mainFrame().render(painter)
            painter.end()
            cheight = image.height()
            cwidth = image.width()
            pos = 0
            while pos < cheight:
                img = image.copy(0, pos, cwidth,
                                 min(cheight - pos, cutoff_height))
                pos += cutoff_height - 20
                if cwidth > self.width:
                    img = img.scaledToWidth(self.width, Qt.SmoothTransform)
                f = os.path.join(self.tdir, '%d.png' % pos)
                img.save(f)
                self.images.append((f, img.width(), img.height()))
        finally:
            QApplication.quit()
Ejemplo n.º 21
0
class HTMLTableRenderer(QObject):

    def __init__(self, html, base_dir, width, height, dpi, factor):
        '''
        `width, height`: page width and height in pixels
        `base_dir`: The directory in which the HTML file that contains the table resides
        '''
        QObject.__init__(self)

        self.app = None
        self.width, self.height, self.dpi = width, height, dpi
        self.base_dir = base_dir
        self.images = []
        self.tdir = tempfile.mkdtemp(prefix='calibre_render_table')
        self.loop = QEventLoop()
        self.page = QWebPage()
        self.connect(self.page, SIGNAL('loadFinished(bool)'), self.render_html)
        self.page.mainFrame().setTextSizeMultiplier(factor)
        self.page.mainFrame().setHtml(html,
                                QUrl('file:'+os.path.abspath(self.base_dir)))


    def render_html(self, ok):
        try:
            if not ok:
                return
            cwidth, cheight = self.page.mainFrame().contentsSize().width(), self.page.mainFrame().contentsSize().height()
            self.page.setViewportSize(QSize(cwidth, cheight))
            factor = float(self.width)/cwidth if cwidth > self.width else 1
            cutoff_height = int(self.height/factor)-3
            image = QImage(self.page.viewportSize(), QImage.Format_ARGB32)
            image.setDotsPerMeterX(self.dpi*(100/2.54))
            image.setDotsPerMeterY(self.dpi*(100/2.54))
            painter = QPainter(image)
            self.page.mainFrame().render(painter)
            painter.end()
            cheight = image.height()
            cwidth = image.width()
            pos = 0
            while pos < cheight:
                img = image.copy(0, pos, cwidth, min(cheight-pos, cutoff_height))
                pos += cutoff_height-20
                if cwidth > self.width:
                    img = img.scaledToWidth(self.width, Qt.SmoothTransform)
                f = os.path.join(self.tdir, '%d.png'%pos)
                img.save(f)
                self.images.append((f, img.width(), img.height()))
        finally:
            QApplication.quit()
Ejemplo n.º 22
0
    def reply_finished(self, datareply):
        data = datareply.readAll()
        page = QWebPage()
        page.mainFrame().setContent(data)
        webpage = page.mainFrame().documentElement()
        if self.wordclass == "definition":
            result = webpage.findAll("div#contentbox")
            a = ""
            if not self.combo2_index:
                a = "div.tlf_cvedette"
            if 1 <= self.combo2_index <= 3:
                a = "span.tlf_cvedette"
            result_to_remove = webpage.findAll(a)
            string_to_remove = result_to_remove.first().toInnerXml()
            final_page = result.first().toInnerXml()
            resultf = final_page.replace(string_to_remove, '')

            result_box = webpage.findFirst('div#vtoolbar')
            result_test = result_box.findAll("a[href]")
            self.formtype = []
            i = 0
            while i < len(result_test):
                multdef_a = unicode(result_test.at(i).toPlainText())
                # Delete digits in definition title
                multdef_clean = ''.join(c for c in
                    multdef_a if not c.isdigit())
                self.formtype.append(multdef_clean)
                i += 1
            self._reply = resultf, self.formtype
        if self.wordclass == "synonyme" or self.wordclass == "antonyme":
            self._reply = []
            result = webpage.findAll("td." + self.wordclass[:4] + "_format")
            tag = []
            i = 0
            while i < len(result):
                tag.append(result.at(i).firstChild().toPlainText())
                i += 1
            self._reply = tag
Ejemplo n.º 23
0
    def __init__(self):

        self.application = app

        wp = QWebPage()
        wp.setForwardUnsupportedContent(True)
        wp.loadFinished.connect(self._on_load_finished)
        wp.loadStarted.connect(self._on_load_started)
        self.webpage = wp
        self.webframe = wp.mainFrame()
        self.headers = []
        self._load_timeout = -1
        self._load_success = False
        self.setSettings()
Ejemplo n.º 24
0
 def run(self):
     for in_url in self.in_:
         webpage = QWebPage()
         webpage.setNetworkAccessManager(self.am)
         webpage.connect(webpage, SIGNAL('loadProgress(int)'), self.loadProgress)
         webpage.connect(webpage, SIGNAL('loadFinished(bool)'), self.loadFinished)
         webpage.connect(webpage, SIGNAL('loadStarted()'), self.loadStarted)
         webpage.settings().setAttribute(QWebSettings.JavaEnabled, self.enable_plugins)
         webpage.settings().setAttribute(QWebSettings.JavascriptEnabled, not self.disable_javascript)
         webpage.settings().setAttribute(QWebSettings.JavascriptCanOpenWindows, False)
         webpage.settings().setAttribute(QWebSettings.JavascriptCanAccessClipboard, False)
         webpage.settings().setAttribute(QWebSettings.PrintElementBackgrounds, self.background)
         webpage.settings().setAttribute(QWebSettings.PluginsEnabled, self.enable_plugins)
         if self.user_style_sheet:
             webpage.settings().setUserStyleSheetUrl(self.guessUrlFromString(self.user_style_sheet))
         url = in_url
         if url == '-':
             tmp = tempfile.NamedTemporaryFile(prefix='tmp', suffix='.html')
             tmp.write(sys.stdin.read())
             url = tmp.name
         url = self.guessUrlFromString(url)
         webpage.mainFrame().load(url)
         self.pages.append(webpage)
Ejemplo n.º 25
0
    def webpageScreenshot(self, html):
        """Take a screenshot of a given html document and return it as a QImage."""
        # see http://www.blogs.uni-osnabrueck.de/rotapken/2008/12/03/create-screenshots-of-a-web-page-using-python-and-qtwebkit/
        size = self.size()
        # size = self.collectionView.page().viewportSize() # seems to be wrongly initialized sometimes...
        webpage = QWebPage()
        webpage.setLinkDelegationPolicy(QWebPage.DelegateAllLinks)
        webpage.setViewportSize(size)
        webpage.mainFrame().setHtml(html)

        # need to wait for the different elements to have loaded completely
        if sys.platform == "linux2":
            while QApplication.hasPendingEvents():
                QApplication.processEvents()
        else:
            QApplication.processEvents()

        image = QImage(size, QImage.Format_ARGB32)
        painter = QPainter(image)
        webpage.mainFrame().render(painter)
        painter.end()

        return image
Ejemplo n.º 26
0
    def __init__(self):

        self.application = app
        self.logger = Logger.getLogger()

        wp = QWebPage()
        wp.setForwardUnsupportedContent(True)
        wp.loadFinished.connect(self._on_load_finished)
        wp.loadStarted.connect(self._on_load_started)
        self.webpage = wp
        self.webframe = wp.mainFrame()
        self.headers = []
        self._load_timeout = -1
        self._load_success = False
        self.setSettings()
Ejemplo n.º 27
0
class CSConverter(QObject):
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
        self.m_webPage = QWebPage(self)

        converter = QFile(':/resources/coffee-script.js')
        if not converter.open(QFile.ReadOnly):
            sys.exit('CoffeeScript compiler is not available!')

        script = str(converter.readAll())
        converter.close()
        self.m_webPage.mainFrame().evaluateJavaScript(script)
        self.m_webPage.mainFrame().addToJavaScriptWindowObject(
            'converter', self)

    def convert(self, script):
        self.setProperty('source', script)
        result = self.m_webPage.mainFrame().evaluateJavaScript('''try {
                                                                      [true, this.CoffeeScript.compile(converter.source)];
                                                                  } catch (error) {
                                                                      [false, error.message];
                                                                  }
                                                               ''')
        return result
Ejemplo n.º 28
0
class Sandbox(object):
    """Execute javascript in webkit browser."""
    def __init__(self, parent=None):
        app = QApplication(['dummy'])
        self.app = app
        self.webpage = QWebPage()
        self.webframe = self.webpage.mainFrame()
        self.webframe.load(QUrl(''))

    def execute(self, script):
        a = self.webframe.evaluateJavaScript(script)
        if a:
            return str(a.toString())

    def close(self):
        self.app.exit()
Ejemplo n.º 29
0
class Sandbox(object):  
    """Execute javascript in webkit browser."""
    def __init__(self, parent=None):
    	app = QApplication(['dummy'])
    	self.app = app
        self.webpage = QWebPage()
        self.webframe = self.webpage.mainFrame()
        self.webframe.load(QUrl(''))

    def execute(self, script):
        a = self.webframe.evaluateJavaScript(script)
        if a:
            return str(a.toString())

    def close(self):
    	self.app.exit()
Ejemplo n.º 30
0
class Crawler:
    geneToOrthologs = {}
    geneToSpecies = {}
    geneSequences = {}
    geneFamilies = None  # A list of sets containing the proteins in that family
    allSpecies = None
    species1Names = None
    species2Names = None
    speciesPairs = []
    malformedXMLFiles = []

    def main(self):
        if not os.path.isdir(run_name):
            os.mkdir(run_name)
        if not os.path.isdir(run_name + '/clustalin'):
            os.mkdir(run_name + '/clustalin')
        if not os.path.isdir(run_name + '/clustalout'):
            os.mkdir(run_name + '/clustalout')
        if not os.path.isdir(run_name + '/roundup'):
            os.mkdir(run_name + '/roundup')
        if not os.path.isdir(run_name + '/mktest_out'):
            os.mkdir(run_name + '/mktest_out')
        self.load_species_names_list()
        self.fetch_uncached_orthologs()
        self.load_gene_list()
        self.find_gene_families()
        # self.output_gene_families()
        self.fetch_gene_sequences()
        self.align_families()
        self.mktest_families()
        exit(0)

############################################# load_species_name_list #############################################

    def load_species_names_list(self):
        if os.path.isfile('%s/species_names.json' % run_name):
            print "Loading cached species names..."
            sn = cjson.decode(open('%s/species_names.json' % run_name).read())
            self.allSpecies = sn['allSpecies']
            self.species1Names = sn['species1Names']
            self.species2Names = sn['species2Names']
        else:
            print "Fetching species names..."
            self.webpage = QWebPage()
            self.webpage.loadFinished.connect(self.process_organism_list)
            self.webpage.mainFrame().load(
                QUrl('http://roundup.hms.harvard.edu/retrieve/'))
            while self.allSpecies == None:
                time.sleep(.05)
                appInstance.processEvents()

    def process_organism_list(self, bool):
        organisms_query = 'select#id_genome_choices'
        organisms_element = self.webpage.mainFrame().findAllElements(
            organisms_query).at(0)
        elmt = organisms_element.firstChild()
        self.allSpecies = []
        while True:
            if elmt == organisms_element.lastChild():
                break
            self.allSpecies.append(str(elmt.attribute('value')))
            elmt = elmt.nextSibling()
        self.species1Names = filter(is_species_1, self.allSpecies)
        self.species2Names = filter(is_species_2, self.allSpecies)
        s_cnt, s1_cnt, s2_cnt = len(self.allSpecies), len(
            self.species1Names), len(self.species2Names)
        print "Found %i species, %i of type 1 and %i of type 2." % (
            s_cnt, s1_cnt, s2_cnt)
        savedict = {
            'allSpecies': self.allSpecies,
            'species1Names': self.species1Names,
            'species2Names': self.species2Names
        }
        open('%s/species_names.json' % run_name,
             'w').write(cjson.encode(savedict))

############################################# fetch_uncached_orthologs #############################################

    def fetch_uncached_orthologs(self):
        self.downloader_pool = eventlet.greenpool.GreenPool(size=5)
        self.pairs_to_download = []
        bridge_pairs = bridges(self.species1Names, self.species2Names)
        print "Bridges:\n\t%s" % ('\n\t'.join(
            itertools.starmap(self.cache_name, bridge_pairs)))
        combs1 = len(self.species1Names) * (len(self.species1Names) - 1) / 2
        combs2 = len(self.species2Names) * (len(self.species2Names) - 1) / 2
        self.speciesPairs.extend(bridge_pairs)
        self.speciesPairs.extend(itertools.combinations(self.species1Names, 2))
        self.speciesPairs.extend(itertools.combinations(self.species2Names, 2))
        print "That's %i combinations of species1, %i of species2, %i bridges." % (
            combs1, combs2, len(bridge_pairs))
        numPairs = len(self.speciesPairs)
        for i in xrange(numPairs):
            l, r = self.speciesPairs[i]
            if i % 20 == 0:
                print "%i%% (%i/%i)\x1B[1F" % (int(
                    i * 100.0 / numPairs), i, numPairs)
            if not os.path.isfile('%s/roundup/%s.xml' %
                                  (run_name, self.cache_name(l, r))):
                self.pairs_to_download.append((l, r))
        num_to_dl = len(self.pairs_to_download)
        print "Fetching %i uncached combinations of species..." % num_to_dl
        pdp = self.downloader_pool.imap(self.fetch_pair,
                                        self.pairs_to_download)
        i = 0
        for response in pdp:
            i += 1
            cachename = self.cache_name(*response)
            print "%i%% (%i/%i): %s\x1B[1F" % (int(
                i * 100.0 / num_to_dl), i, num_to_dl, cachename)

    def cache_name(self, lSpecies, rSpecies):
        name = lSpecies + '---' + rSpecies
        valid_chrs = '-_.() %s%s' % (string.ascii_letters, string.digits)
        filename = ''.join(c for c in name if c in valid_chrs)
        return filename

    def fetch_pair(self, (lSpecies, rSpecies)):
        while True:
            try:
                self.attempt_fetch_pair((lSpecies, rSpecies))
                break
            except urllib2.URLError as e:
                print "Error fetching (%s,%s): %s" % (lSpecies, rSpecies, e)
        return (lSpecies, rSpecies)
Ejemplo n.º 31
0
class Browser:
    """
    Stateful programmatic web browser class based upon QtWebKit.   
    
    >>> browser = Browser()
    >>> browser.load("http://www.wordreference.com")
    >>> browser.runjs("console.log('I can run Javascript!')")
    >>> browser.runjs("_jQuery('div').css('border', 'solid red')") # and jQuery!
    >>> browser.select("#esen")
    >>> browser.fill("input[name=enit]", "hola")
    >>> browser.click("input[name=b]", wait_load=True)
    >>> print browser.url, len(browser.html)
    >>> browser.close()
    """
    ignore_ssl_errors = True
    """@ivar: If True, ignore SSL certificate errors."""
    user_agent = None
    """@ivar: User agent for requests (see QWebPage::userAgentForUrl for details)"""
    jslib = "_jQuery"
    """@ivar: Library name for jQuery library injected by default to pages."""
    download_directory = "."
    """@ivar: Directory where downloaded files will be stored."""
    debug_stream = sys.stderr
    """@ivar: File-like stream where debug output will be written."""
    debug_level = ERROR
    """@ivar: Debug verbose level (L{ERROR}, L{WARNING}, L{INFO} or L{DEBUG})."""
    event_looptime = 0.01
    """@ivar: Event loop dispatcher loop delay (seconds)."""

    _javascript_files = ["jquery.min.js", "jquery.simulate.js"]

    _javascript_directories = [
        os.path.join(os.path.dirname(__file__), "../javascript"),
        os.path.join(sys.prefix, "share/spynner/javascript"),
    ]

    def __init__(self, qappargs=None, debug_level=None):
        """        
        Init a Browser instance.
        
        @param qappargs: Arguments for QApplication constructor.
        @param debug_level: Debug level logging (L{ERROR} by default)
        """
        self.application = QApplication(qappargs or [])
        """PyQt4.QtGui.Qapplication object."""
        if debug_level is not None:
            self.debug_level = debug_level
        self.webpage = QWebPage()
        """PyQt4.QtWebKit.QWebPage object."""
        self.webpage.userAgentForUrl = self._user_agent_for_url
        self.webframe = self.webpage.mainFrame()
        """PyQt4.QtWebKit.QWebFrame main webframe object."""
        self.webview = None
        """PyQt4.QtWebKit.QWebView object."""
        self._url_filter = None
        self._html_parser = None

        # Javascript
        directory = _first(self._javascript_directories, os.path.isdir)
        if not directory:
            raise SpynnerError("Cannot find javascript directory: %s" %
                               self._javascript_directories)
        self.javascript = "".join(
            open(os.path.join(directory, fn)).read()
            for fn in self._javascript_files)

        self.webpage.javaScriptAlert = self._javascript_alert
        self.webpage.javaScriptConsoleMessage = self._javascript_console_message
        self.webpage.javaScriptConfirm = self._javascript_confirm
        self.webpage.javaScriptPrompt = self._javascript_prompt
        self._javascript_confirm_callback = None
        self._javascript_confirm_prompt = None

        # Network Access Manager and cookies
        self.manager = QNetworkAccessManager()
        """PyQt4.QtNetwork.QTNetworkAccessManager object."""
        self.manager.createRequest = self._manager_create_request
        self.webpage.setNetworkAccessManager(self.manager)
        self.cookiesjar = _ExtendedNetworkCookieJar()
        """PyQt4.QtNetwork.QNetworkCookieJar object."""
        self.manager.setCookieJar(self.cookiesjar)
        self.manager.connect(
            self.manager,
            SIGNAL("sslErrors(QNetworkReply *, const QList<QSslError> &)"),
            self._on_manager_ssl_errors)
        self.manager.connect(self.manager, SIGNAL('finished(QNetworkReply *)'),
                             self._on_reply)
        self.manager.connect(
            self.manager,
            SIGNAL(
                'authenticationRequired(QNetworkReply *, QAuthenticator *)'),
            self._on_authentication_required)
        self._operation_names = dict(
            (getattr(QNetworkAccessManager, s + "Operation"), s.lower())
            for s in ("Get", "Head", "Post", "Put"))

        # Webpage slots
        self._load_status = None
        self._replies = 0
        self.webpage.setForwardUnsupportedContent(True)
        self.webpage.connect(self.webpage,
                             SIGNAL('unsupportedContent(QNetworkReply *)'),
                             self._on_unsupported_content)
        self.webpage.connect(self.webpage, SIGNAL('loadFinished(bool)'),
                             self._on_load_finished)
        self.webpage.connect(self.webpage, SIGNAL("loadStarted()"),
                             self._on_load_started)

    def _events_loop(self, wait=None):
        if wait is None:
            wait = self.event_looptime
        self.application.processEvents()
        time.sleep(wait)

    def _on_load_started(self):
        self._load_status = None
        self._debug(INFO, "Page load started")

    def _on_manager_ssl_errors(self, reply, errors):
        url = unicode(reply.url().toString())
        if self.ignore_ssl_errors:
            self._debug(WARNING, "SSL certificate error ignored: %s" % url)
            reply.ignoreSslErrors()
        else:
            self._debug(WARNING, "SSL certificate error: %s" % url)

    def _on_authentication_required(self, reply, authenticator):
        url = unicode(reply.url().toString())
        realm = unicode(authenticator.realm())
        self._debug("HTTP auth required: %s (realm: %s)" % (url, realm))
        if not self._http_authentication_callback:
            self._debug(WARNING, "HTTP auth required, but no callback defined")
            return
        credentials = self._http_authentication_callback(url, realm)
        if credentials:
            user, password = credentials
            self._debug(
                INFO, "callback returned HTTP credentials: %s/%s" %
                (user, "*" * len(password)))
            authenticator.setUser(user)
            authenticator.setPassword(password)
        else:
            self._debug(WARNING, "HTTP auth callback returned no credentials")

    def _manager_create_request(self, operation, request, data):
        url = unicode(request.url().toString())
        operation_name = self._operation_names[operation].upper()
        self._debug(INFO, "Request: %s %s" % (operation_name, url))
        for h in request.rawHeaderList():
            self._debug(DEBUG, "  %s: %s" % (h, request.rawHeader(h)))
        if self._url_filter:
            if self._url_filter(self._operation_names[operation],
                                url) is False:
                self._debug(INFO, "URL filtered: %s" % url)
                request.setUrl(QUrl("about:blank"))
            else:
                self._debug(DEBUG, "URL not filtered: %s" % url)
        reply = QNetworkAccessManager.createRequest(self.manager, operation,
                                                    request, data)
        return reply

    def _on_reply(self, reply):
        self._replies += 1
        url = unicode(reply.url().toString())
        if reply.error():
            self._debug(
                WARNING, "Reply error: %s - %d (%s)" %
                (url, reply.error(), reply.errorString()))
        else:
            self._debug(INFO, "Reply successful: %s" % url)
        for header in reply.rawHeaderList():
            self._debug(DEBUG, "  %s: %s" % (header, reply.rawHeader(header)))

    def _on_unsupported_content(self, reply, outfd=None):
        if not reply.error():
            self._start_download(reply, outfd)
        else:
            self._debug(
                ERROR,
                "Error on unsupported content: %s" % reply.errorString())

    def _javascript_alert(self, webframe, message):
        self._debug(INFO, "Javascript alert: %s" % message)
        if self.webview:
            QWebPage.javaScriptAlert(self.webpage, webframe, message)

    def _javascript_console_message(self, message, line, sourceid):
        if line:
            self._debug(
                INFO,
                "Javascript console (%s:%d): %s" % (sourceid, line, message))
        else:
            self._debug(INFO, "Javascript console: %s" % message)

    def _javascript_confirm(self, webframe, message):
        smessage = unicode(message)
        url = webframe.url()
        self._debug(
            INFO,
            "Javascript confirm (webframe url = %s): %s" % (url, smessage))
        if self._javascript_confirm_callback:
            value = self._javascript_confirm_callback(url, smessage)
            self._debug(INFO,
                        "Javascript confirm callback returned %s" % value)
            return value
        return QWebPage.javaScriptConfirm(self.webpage, webframe, message)

    def _javascript_prompt(self, webframe, message, defaultvalue, result):
        url = webframe.url()
        smessage = unicode(message)
        self._debug(
            INFO,
            "Javascript prompt (webframe url = %s): %s" % (url, smessage))
        if self._javascript_prompt_callback:
            value = self._javascript_prompt_callback(url, smessage,
                                                     defaultvalue)
            self._debug(INFO,
                        "Javascript prompt callback returned: %s" % value)
            if value in (False, None):
                return False
            result.clear()
            result.append(value)
            return True
        return QWebPage.javaScriptPrompt(self.webpage, webframe, message,
                                         defaultvalue, result)

    def _on_webview_destroyed(self, window):
        self.webview = None

    def _on_load_finished(self, successful):
        self._load_status = successful
        status = {True: "successful", False: "error"}[successful]
        self._debug(
            INFO, "Page load finished (%d bytes): %s (%s)" %
            (len(self.html), self.url, status))

    def _get_filepath_for_url(self, url):
        urlinfo = urlparse.urlsplit(url)
        path = os.path.join(self.download_directory,
                            urlinfo.netloc + urlinfo.path)
        if not os.path.isdir(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        return path

    def _start_download(self, reply, outfd):
        def _on_ready_read():
            data = reply.readAll()
            if not hasattr(reply, "downloaded_nbytes"):
                reply.downloaded_nbytes = 0
            reply.downloaded_nbytes += len(data)
            outfd.write(data)
            self._debug(
                DEBUG,
                "Read from download stream (%d bytes): %s" % (len(data), url))

        def _on_network_error():
            self.debug(ERROR, "Network error on download: %s" % url)

        def _on_finished():
            self._debug(INFO, "Download finished: %s" % url)

        url = unicode(reply.url().toString())
        if outfd is None:
            path = self._get_filepath_for_url(url)
            outfd = open(path, "wb")
        reply.connect(reply, SIGNAL("readyRead()"), _on_ready_read)
        reply.connect(reply, SIGNAL("NetworkError()"), _on_network_error)
        reply.connect(reply, SIGNAL("finished()"), _on_finished)
        self._debug(INFO, "Start download: %s" % url)

    def _wait_load(self, timeout=None):
        self._events_loop(0.0)
        if self._load_status is not None:
            load_status = self._load_status
            self._load_status = None
            return load_status
        itime = time.time()
        while self._load_status is None:
            if timeout and time.time() - itime > timeout:
                raise SpynnerTimeout("Timeout reached: %d seconds" % timeout)
            self._events_loop()
        self._events_loop(0.0)
        if self._load_status:
            jscode = "var %s = jQuery.noConflict();" % self.jslib
            self.runjs(self.javascript + jscode, debug=False)
            self.webpage.setViewportSize(
                self.webpage.mainFrame().contentsSize())
        load_status = self._load_status
        self._load_status = None
        return load_status

    def _debug(self, level, *args):
        if level <= self.debug_level:
            kwargs = dict(outfd=self.debug_stream)
            _debug(*args, **kwargs)

    def _user_agent_for_url(self, url):
        if self.user_agent:
            return self.user_agent
        return QWebPage.userAgentForUrl(self.webpage, url)

    def _runjs_on_jquery(self, name, code):
        code2 = "result = %s; result.length" % code
        if self.runjs(code2).toInt() < 1:
            raise SpynnerJavascriptError("error on %s: %s" % (name, code))

    def _get_html(self):
        return unicode(self.webframe.toHtml())
        #return str(self.webframe.toHtml().toAscii())

    def _get_soup(self):
        if not self._html_parser:
            raise SpynnerError("Cannot get soup with no HTML parser defined")
        return self._html_parser(self.html)

    def _get_url(self):
        return unicode(self.webframe.url().toString())

    # Properties

    url = property(_get_url)
    """Current URL."""

    html = property(_get_html)
    """Rendered HTML in current page."""

    soup = property(_get_soup)
    """HTML soup (see L{set_html_parser})."""

    #{ Basic interaction with browser

    def load(self, url):
        """Load a web page and return status (a boolean)."""
        self.webframe.load(QUrl(url))
        return self._wait_load()

    def click(self,
              selector,
              wait_load=False,
              wait_requests=None,
              timeout=None):
        """
        Click any clickable element in page.
        
        @param selector: jQuery selector.
        @param wait_load: If True, it will wait until a new page is loaded.
        @param timeout: Seconds to wait for the page to load before 
                                       raising an exception.
        @param wait_requests: How many requests to wait before returning. Useful
                              for AJAX requests.
    
        By default this method will not wait for a page to load. 
        If you are clicking a link or submit button, you must call this
        method with C{wait_load=True} or, alternatively, call 
        L{wait_load} afterwards. However, the recommended way it to use 
        L{click_link}.
                        
        When a non-HTML file is clicked this method will download it. The 
        file is automatically saved keeping the original structure (as 
        wget --recursive does). For example, a file with URL 
        I{http://server.org/dir1/dir2/file.ext} will be saved to  
        L{download_directory}/I{server.org/dir1/dir2/file.ext}.                 
        """
        jscode = "%s('%s').simulate('click')" % (self.jslib, selector)
        self._replies = 0
        self._runjs_on_jquery("click", jscode)
        if wait_requests:
            while self._replies < wait_requests:
                self._events_loop()
            self._events_loop(0.0)
        if wait_load:
            return self._wait_load(timeout)

    def click_link(self, selector, timeout=None):
        """Click a link and wait for the page to load."""
        return self.click(selector, wait_load=True, timeout=timeout)

    def click_ajax(self, selector, wait_requests=1, timeout=None):
        """Click a AJAX link and wait for the request to finish."""
        return self.click(selector,
                          wait_requests=wait_requests,
                          timeout=timeout)

    def wait_load(self, timeout=None):
        """
        Wait until the page is loaded.
        
        @param timeout: Time to wait (seconds) for the page load to complete.
        @return: Boolean state
        @raise SpynnerTimeout: If timeout is reached.
        """
        return self._wait_load(timeout)

    def wait(self, waittime):
        """
        Wait some time.
        
        @param waittime: Time to wait (seconds).
        
        This is an active wait, the events loop will be run, so it
        may be useful to wait for synchronous Javascript events that
        change the DOM.
        """
        itime = time.time()
        while time.time() - itime < waittime:
            self._events_loop()

    def close(self):
        """Close Browser instance and release resources."""
        if self.webview:
            self.destroy_webview()
        if self.webpage:
            del self.webpage

    @classmethod
    def configure_proxy(cls,
                        hostname,
                        port,
                        user=None,
                        password=None,
                        proxy_type=QNetworkProxy.HttpProxy):
        """
        Configure network proxy layer. 
        
        @param proxy_type: see QNetworkProxy.ProxyType. Default: HttpProxy.
        @param hostname: Proxy hostname.
        @param port: Proxy port.
        @param username: Proxy username (optional).
        @param passwrod: Proxy password (optional).
        """
        proxy = QNetworkProxy()
        proxy.setType(proxy_type)
        proxy.setHostName(hostname)
        proxy.setPort(port)
        if user and password is not None:
            proxy.setUser(user)
            proxy.setPassword(password)
        QNetworkProxy.setApplicationProxy(proxy)

    #}

    #{ Webview

    def create_webview(self, show=False):
        """Create a QWebView object and insert current QWebPage."""
        if self.webview:
            raise SpynnerError("Cannot create webview (already initialized)")
        self.webview = QWebView()
        self.webview.setPage(self.webpage)
        window = self.webview.window()
        window.setAttribute(Qt.WA_DeleteOnClose)
        window.connect(window, SIGNAL('destroyed(QObject *)'),
                       self._on_webview_destroyed)
        if show:
            self.show()

    def destroy_webview(self):
        """Destroy current QWebView."""
        if not self.webview:
            raise SpynnerError("Cannot destroy webview (not initialized)")
        del self.webview

    def show(self):
        """Show webview browser."""
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.webview.show()

    def hide(self):
        """Hide webview browser."""
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.webview.hide()

    def browse(self):
        """Let the user browse the current page (infinite loop)."""
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.show()
        while self.webview:
            self._events_loop()

    #}

    #{ Form manipulation

    def fill(self, selector, value):
        """Fill an input text with a string value using a jQuery selector."""
        escaped_value = value.replace("'", "\\'")
        jscode = "%s('%s').val('%s')" % (self.jslib, selector, escaped_value)
        self._runjs_on_jquery("fill", jscode)

    def check(self, selector):
        """Check an input checkbox using a jQuery selector."""
        jscode = "%s('%s').attr('checked', true)" % (self.jslib, selector)
        self._runjs_on_jquery("check", jscode)

    def uncheck(self, selector):
        """Uncheck input checkbox using a jQuery selector"""
        jscode = "%s('%s').attr('checked', false)" % (self.jslib, selector)
        self._runjs_on_jquery("uncheck", jscode)

    def choose(self, selector):
        """Choose a radio input using a jQuery selector."""
        jscode = "%s('%s').simulate('click')" % (self.jslib, selector)
        self._runjs_on_jquery("choose", jscode)

    def select(self, selector):
        """Choose a option in a select using a jQuery selector."""
        jscode = "%s('%s').attr('selected', 'selected')" % (self.jslib,
                                                            selector)
        self._runjs_on_jquery("select", jscode)

    submit = click_link

    #}

    #{ Javascript

    def runjs(self, jscode, debug=True):
        """
        Inject Javascript code into the current context of page.

        @param jscode: Javascript code to injected.
        @param debug: Set to False to disable debug output for this injection.
        
        You can call Jquery even if the original page does not include it 
        as Spynner injects the library for every loaded page. You must 
        use C{_jQuery(...)} instead of of C{jQuery} or the common {$(...)} 
        shortcut. 
        
        @note: You can change the _jQuery alias (see L{jslib}).        
        """
        if debug:
            self._debug(DEBUG, "Run Javascript code: %s" % jscode)
        r = self.webpage.mainFrame().evaluateJavaScript(jscode)
        if not r.isValid():
            r = self.webpage.mainFrame().evaluateJavaScript(jscode)
        return r

    def set_javascript_confirm_callback(self, callback):
        """
        Set function callback for Javascript confirm pop-ups.
        
        By default Javascript confirmations are not answered. If the webpage
        you are working pops Javascript confirmations, be sure to set a callback
        for them. 
        
        Calback signature: C{javascript_confirm_callback(url, message)}
        
            - url: Url where the popup was launched.        
            - param message: String message.
        
        The callback should return a boolean (True meaning 'yes', False meaning 'no')
        """
        self._javascript_confirm_callback = callback

    def set_javascript_prompt_callback(self, callback):
        """
        Set function callback for Javascript prompt.
        
        By default Javascript prompts are not answered. If the webpage
        you are working pops Javascript prompts, be sure to set a callback
        for them. 
        
        Callback signature: C{javascript_prompt_callback(url, message, defaultvalue)}
        
            - url: Url where the popup prompt was launched.
            - message: String message.
            - defaultvalue: Default value for prompt answer
            
        The callback should return a string with the answer or None to cancel the prompt.
        """
        self._javascript_prompt_callback = callback

    #}

    #{ Cookies

    def get_cookies(self):
        """Return string containing the current cookies in Mozilla format."""
        return self.cookiesjar.mozillaCookies()

    def set_cookies(self, string_cookies):
        """Set cookies from a string with Mozilla-format cookies."""
        return self.cookiesjar.setMozillaCookies(string_cookies)

    #}

    #{ Download files

    def download(self, url, outfd=None):
        """
        Download a given URL using current cookies.
        
        @param url: URL or path to download
        @param outfd: Output file-like stream. If None, return data string.
        @return: Bytes downloaded (None if something went wrong)
        @note: If url is a path, the current base URL will be pre-appended.        
        """
        def _on_reply(reply):
            url = unicode(reply.url().toString())
            self._download_reply_status = not bool(reply.error())

        self._download_reply_status = None
        if not urlparse.urlsplit(url).scheme:
            url = urlparse.urljoin(self.url, url)
        request = QNetworkRequest(QUrl(url))
        # Create a new manager to process this download
        manager = QNetworkAccessManager()
        reply = manager.get(request)
        if reply.error():
            raise SpynnerError("Download error: %s" % reply.errorString())
        reply.downloaded_nbytes = 0
        manager.setCookieJar(self.manager.cookieJar())
        manager.connect(manager, SIGNAL('finished(QNetworkReply *)'),
                        _on_reply)
        outfd_set = bool(outfd)
        if not outfd_set:
            outfd = StringIO()
        self._start_download(reply, outfd)
        while self._download_reply_status is None:
            self._events_loop()
        if outfd_set:
            return (reply.downloaded_nbytes if not reply.error() else None)
        else:
            return outfd.getvalue()

    #}

    #{ HTML and tag soup parsing

    def set_html_parser(self, parser):
        """
        Set HTML parser used to generate the HTML L{soup}.
        
        @param parser: Callback called to generate the soup.
        
        When a HTML parser is set for a Browser, the property L{soup} returns
        the parsed HTML.        
        """
        self._html_parser = parser

    def html_contains(self, regexp):
        """Return True if current HTML contains a given regular expression."""
        return bool(re.search(regexp, self.html))

    #}

    #{ HTTP Authentication

    def set_http_authentication_callback(self, callback):
        """
        Set HTTP authentication request callback.
        
        The callback must have this signature: 
        
        C{http_authentication_callback(url, realm)}: 
                        
            - C{url}: URL where the requested was made.
            - C{realm}: Realm requiring authentication.
            
        The callback should return a pair of string containing (user, password) 
        or None if you don't want to answer.
        """
        self._http_authentication_callback = callback

    #}

    #{ Miscellaneous

    def snapshot(self, box=None, format=QImage.Format_ARGB32):
        """        
        Take an image snapshot of the current frame.
        
        @param box: 4-element tuple containing box to capture (x1, y1, x2, y2).
                    If None, capture the whole page.
        @param format: QImage format (see QImage::Format_*).
        @return: A QImage image.
        
        Typical usage:
        
        >>> browser.load(url)
        >>> browser.snapshot().save("webpage.png") 
        """
        if box:
            x1, y1, x2, y2 = box
            w, h = (x2 - x1), (y2 - y1)
            image0 = QImage(QSize(x2, y2), format)
            painter = QPainter(image0)
            self.webpage.mainFrame().render(painter)
            painter.end()
            image = image0.copy(x1, y1, w, h)
        else:
            image = QImage(self.webpage.viewportSize(), format)
            painter = QPainter(image)
            self.webpage.mainFrame().render(painter)
            painter.end()
        return image

    def get_url_from_path(self, path):
        """Return the URL for a given path using the current URL as base."""
        return urlparse.urljoin(self.url, path)

    def set_url_filter(self, url_filter):
        """
        Set function callback to filter URL.
        
        By default all requested elements of a page are loaded. That includes 
        stylesheets, images and many other elements that you may not need at all.         
        Use this method to define the callback that will be called every time 
        a new request is made. The callback must have this signature: 
        
        C{my_url_filter(operation, url)}: 
                        
            - C{operation}: string with HTTP operation: C{get}, C{head}, 
                            C{post} or C{put}.
            - C{url}: requested item URL.
            
        It should return C{True} (proceed) or C{False} (reject).
        """
        self._url_filter = url_filter
Ejemplo n.º 32
0
def save_webpage_screenshot(url, width, height, file_name=None):
    """Saves a screenshot of the webpage given in url into filename+".png"
    
    width and height, if given, are in pixels
    if not given, the browser's default dimensions will be used.
    
    Example:
    
    save_webpage_screenshot(
        "http://www.example.com",
        "example",
        width=1024,
        height=768
    )
    """
    app = QApplication(sys.argv)
    signal.signal(signal.SIGINT, signal.SIG_DFL)

    webpage = QWebPage()

    # set page dimensions
    webpage.setViewportSize(QSize(int(width), int(height)))

    # display errors otherwise debugging is very difficult
    def print_error(message, lineNumber, sourceID):
        print "\n%(sourceID)s line %(lineNumber)i: \n  %(message)s" % locals()

    webpage.javaScriptConsoleMessage = print_error

    if file_name is None:
        result = []

    # register print request handler
    def onPrintRequested(virtual_browser_window):
        # print "onPrintRequested"

        # Paint this frame into an image
        image = QImage(webpage.viewportSize(), QImage.Format_ARGB32)
        painter = QPainter(image)
        virtual_browser_window.render(painter)
        painter.end()

        if file_name is not None:
            image.save(file_name + ".png")
        else:
            byte_array = QByteArray()
            buffer = QBuffer(byte_array)
            buffer.open(QIODevice.WriteOnly)
            image.save(buffer, format="PNG")
            result.append(str(byte_array))

        if __name__ == "__main__":
            if file_name is None:
                sys.stdout.write(result[0])
            sys.exit(0)
        else:
            app.quit()

    webpage.printRequested.connect(onPrintRequested)

    # load the page and wait for a print request
    webpage.mainFrame().load(QUrl(url))

    app.exec_()
    if file_name is None:
        return result[0]
Ejemplo n.º 33
0
class Evaluator(object):
    _replacechars = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЪЭЮЯабвгдеёжзийклмнопрстуфхцчшщьъэюя'
    
    def __init__(self):
        self.app = QApplication([])
        self.page = QWebPage()
        self.frame = self.page.mainFrame()
        
    def __del__(self):
        del self.frame
        del self.page
        del self.app

    def set_html(self, html):
        self.frame.setHtml(html)
    
    def reset_html(self):
        self.set_html('<html><head></head><body></body></html>')

    def eval_js(self, script):
        res = self.frame.evaluateJavaScript(str(script))
        if type(res) == str:
            return res
        else:
            del res
    
    def get_html(self):
        res = self.frame.toHtml()
        s = str(res)
        del res
        return s

    def find_script(self, page):
        soup = BeautifulSoup(page)
        script = soup.body.find(text=re.compile('dеobfuscate_html\(\)'))
        s = str(script)
        return s

    def deobfuscate_form(self, script):
        self.reset_html()
        self.eval_js(script)
        self.eval_js('captcha_div2.innerHTML=dеobfuscate_html();')

        return self.get_html()
    
    def _filter_markup(self, form):
        for c in self._replacechars: form = form.replace(c, '')
        return form
    
    def find_capair(self, domain, form):
        '''Finds cahash on deobfuscated page.'''
        soup = BeautifulSoup(form)
        cahash = soup.body.find(attrs={'name': re.compile('cahash')})
        if cahash:
            caimg = soup.body.find(attrs=
                    {'src': re.compile(''.join(('\/a\d\.',
                                                domain,
                                                '\/i\/captcha\/')))})
            if caimg:
                return [str(cahash.get('value')), str(caimg.get('src'))]
            else:
                raise exc.PermanentError('caimg not found in form')
        else:
            raise exc.PermanentError('cahash not found in form')
    
    def solve_capage(self, domain, page):
        s = self.find_script(page)
        if s:
            if type(domain) == bytes: domain = domain.decode('utf-8')
            form = self._filter_markup(self.deobfuscate_form(s))
            return self.find_capair(domain, form)
        else:
            raise exc.PermanentError('Obfuscated html not found in page')
Ejemplo n.º 34
0
class Browser:
    """
    Stateful programmatic web browser class based upon QtWebKit.   
    
    >>> browser = Browser()
    >>> browser.load("http://www.wordreference.com")
    >>> browser.runjs("console.log('I can run Javascript!')")
    >>> browser.runjs("_jQuery('div').css('border', 'solid red')") # and jQuery!
    >>> browser.select("#esen")
    >>> browser.fill("input[name=enit]", "hola")
    >>> browser.click("input[name=b]", wait_load=True)
    >>> print browser.url, len(browser.html)
    >>> browser.close()
    """
    ignore_ssl_errors = True
    """@ivar: If True, ignore SSL certificate errors."""
    user_agent = None
    """@ivar: User agent for requests (see QWebPage::userAgentForUrl for details)"""
    jslib = "jq"
    """@ivar: Library name for jQuery library injected by default to pages."""
    download_directory = "."
    """@ivar: Directory where downloaded files will be stored."""    
    debug_stream = sys.stderr
    """@ivar: File-like stream where debug output will be written."""
    debug_level = ERROR
    """@ivar: Debug verbose level (L{ERROR}, L{WARNING}, L{INFO} or L{DEBUG})."""    
    event_looptime = 0.01
    """@ivar: Event loop dispatcher loop delay (seconds)."""
    
    errorCode = None
    errorMessage = None

    _javascript_files = ["jquery.min.js", "jquery.simulate.js"]

    _javascript_directories = [
        os.path.join(os.path.dirname(__file__), "../javascript"),
        os.path.join(sys.prefix, "share/spynner/javascript"),
    ]
    
    def __init__(self, qappargs=None, debug_level=None):
        """        
        Init a Browser instance.
        
        @param qappargs: Arguments for QApplication constructor.
        @param debug_level: Debug level logging (L{ERROR} by default)
        """ 
        self.application = QApplication(qappargs or [])
        """PyQt4.QtGui.Qapplication object."""
        if debug_level is not None:
            self.debug_level = debug_level
        self.webpage = QWebPage()
        """PyQt4.QtWebKit.QWebPage object."""
        self.webpage.userAgentForUrl = self._user_agent_for_url
        self.webframe = self.webpage.mainFrame()
        """PyQt4.QtWebKit.QWebFrame main webframe object."""
        self.webview = None
        """PyQt4.QtWebKit.QWebView object."""        
        self._url_filter = None
        self._html_parser = None
            
        # Javascript
        directory = _first(self._javascript_directories, os.path.isdir)
        if not directory:
            raise SpynnerError("Cannot find javascript directory: %s" %
                self._javascript_directories)           
        self.javascript = "".join(open(os.path.join(directory, fn)).read() 
            for fn in self._javascript_files)

        self.webpage.javaScriptAlert = self._javascript_alert                
        self.webpage.javaScriptConsoleMessage = self._javascript_console_message
        self.webpage.javaScriptConfirm = self._javascript_confirm
        self.webpage.javaScriptPrompt = self._javascript_prompt
        self._javascript_confirm_callback = None
        self._javascript_confirm_prompt = None
        
        # Network Access Manager and cookies
        self.manager = QNetworkAccessManager()
        """PyQt4.QtNetwork.QTNetworkAccessManager object."""
        self.manager.createRequest = self._manager_create_request 
        self.webpage.setNetworkAccessManager(self.manager)            
        self.cookiesjar = _ExtendedNetworkCookieJar()
        """PyQt4.QtNetwork.QNetworkCookieJar object."""
        self.manager.setCookieJar(self.cookiesjar)
        self.manager.connect(self.manager, 
            SIGNAL("sslErrors(QNetworkReply *, const QList<QSslError> &)"),
            self._on_manager_ssl_errors)
        self.manager.connect(self.manager, 
            SIGNAL('finished(QNetworkReply *)'),
            self._on_reply)
        self.manager.connect(self.manager,
            SIGNAL('authenticationRequired(QNetworkReply *, QAuthenticator *)'),
            self._on_authentication_required)   
        self._operation_names = dict(
            (getattr(QNetworkAccessManager, s + "Operation"), s.lower()) 
            for s in ("Get", "Head", "Post", "Put"))
        
        # Webpage slots         
        self._load_status = None
        self._replies = 0
        self.webpage.setForwardUnsupportedContent(True)
        self.webpage.connect(self.webpage,
            SIGNAL('unsupportedContent(QNetworkReply *)'), 
            self._on_unsupported_content)
        self.webpage.connect(self.webpage, 
            SIGNAL('loadFinished(bool)'),
            self._on_load_finished)            
        self.webpage.connect(self.webpage, 
            SIGNAL("loadStarted()"),
            self._on_load_started)

    def _events_loop(self, wait=None):
        if wait is None:
            wait = self.event_looptime
        self.application.processEvents()
        time.sleep(wait)        
                        
    def _on_load_started(self):
        self._load_status = None
        self._debug(INFO, "Page load started")            
    
    def _on_manager_ssl_errors(self, reply, errors):
        url = unicode(reply.url().toString())
        if self.ignore_ssl_errors:
            self._debug(WARNING, "SSL certificate error ignored: %s" % url)
            reply.ignoreSslErrors()
        else:
            self._debug(WARNING, "SSL certificate error: %s" % url)

    def _on_authentication_required(self, reply, authenticator):
        url = unicode(reply.url().toString())
        realm = unicode(authenticator.realm())
        self._debug("HTTP auth required: %s (realm: %s)" % (url, realm))
        if not self._http_authentication_callback:
            self._debug(WARNING, "HTTP auth required, but no callback defined")
            return        
        credentials = self._http_authentication_callback(url, realm)        
        if credentials:            
            user, password = credentials
            self._debug(INFO, "callback returned HTTP credentials: %s/%s" % 
                (user, "*"*len(password)))
            authenticator.setUser(user)
            authenticator.setPassword(password)
        else:
            self._debug(WARNING, "HTTP auth callback returned no credentials")
        
    def _manager_create_request(self, operation, request, data):
        url = unicode(request.url().toString())
        operation_name = self._operation_names[operation].upper()
        self._debug(INFO, "Request: %s %s" % (operation_name, url))
        for h in request.rawHeaderList():
            self._debug(DEBUG, "  %s: %s" % (h, request.rawHeader(h)))
        if self._url_filter:
            if self._url_filter(self._operation_names[operation], url) is False:
                self._debug(INFO, "URL filtered: %s" % url)
                request.setUrl(QUrl("about:blank"))
            else:
                self._debug(DEBUG, "URL not filtered: %s" % url)
        reply = QNetworkAccessManager.createRequest(self.manager, 
            operation, request, data)        
        return reply

    def _on_reply(self, reply):
        self._replies += 1
        self._reply_url = unicode(reply.url().toString())
        self._reply_status = not bool(reply.error())

        if reply.error():
            self._debug(WARNING, "Reply error: %s - %d (%s)" % 
                (self._reply_url, reply.error(), reply.errorString()))
            self.errorCode = reply.error()
            self.errorMessage = reply.errorString()
        else:
            self._debug(INFO, "Reply successful: %s" % self._reply_url)
        for header in reply.rawHeaderList():
            self._debug(DEBUG, "  %s: %s" % (header, reply.rawHeader(header)))

    def _on_unsupported_content(self, reply, outfd=None):
        if not reply.error():
            self._start_download(reply, outfd)
        else:            
            self._debug(ERROR, "Error on unsupported content: %s" % reply.errorString())
                             
    def _javascript_alert(self, webframe, message):
        self._debug(INFO, "Javascript alert: %s" % message)
        if self.webview:
            QWebPage.javaScriptAlert(self.webpage, webframe, message)
        
    def _javascript_console_message(self, message, line, sourceid):
        if line:
            self._debug(INFO, "Javascript console (%s:%d): %s" %
                (sourceid, line, message))
        else:
            self._debug(INFO, "Javascript console: %s" % message)

    def _javascript_confirm(self, webframe, message):
        smessage = unicode(message)
        url = webframe.url()
        self._debug(INFO, "Javascript confirm (webframe url = %s): %s" % 
            (url, smessage))
        if self._javascript_confirm_callback:
            value = self._javascript_confirm_callback(url, smessage)
            self._debug(INFO, "Javascript confirm callback returned %s" % value)
            return value 
        return QWebPage.javaScriptConfirm(self.webpage, webframe, message)

    def _javascript_prompt(self, webframe, message, defaultvalue, result):
        url = webframe.url()
        smessage = unicode(message)
        self._debug(INFO, "Javascript prompt (webframe url = %s): %s" % 
            (url, smessage))
        if self._javascript_prompt_callback:
            value = self._javascript_prompt_callback(url, smessage, defaultvalue)
            self._debug(INFO, "Javascript prompt callback returned: %s" % value)
            if value in (False, None):
                return False
            result.clear()
            result.append(value)
            return True
        return QWebPage.javaScriptPrompt(self.webpage, webframe, message,
            defaultvalue, result)
        
    def _on_webview_destroyed(self, window):
        self.webview = None
                                             
    def _on_load_finished(self, successful):        
        self._load_status = successful  
        status = {True: "successful", False: "error"}[successful]
        self._debug(INFO, "Page load finished (%d bytes): %s (%s)" % 
            (len(self.html), self.url, status))

    def _get_filepath_for_url(self, url):
        urlinfo = urlparse.urlsplit(url)
        path = os.path.join(self.download_directory,
            urlinfo.netloc + urlinfo.path)
        if not os.path.isdir(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))
        return path

    def _start_download(self, reply, outfd):
        def _on_ready_read():
            data = reply.readAll()
            reply.downloaded_nbytes += len(data)
            outfd.write(data)
            self._debug(DEBUG, "Read from download stream (%d bytes): %s" 
                % (len(data), url))
        def _on_network_error():
            self.debug(ERROR, "Network error on download: %s" % url)
        def _on_finished():
            self._debug(INFO, "Download finished: %s" % url)
        url = unicode(reply.url().toString())
        if outfd is None:
            path = self._get_filepath_for_url(url)
            outfd = open(path, "wb")            
        reply.connect(reply, SIGNAL("readyRead()"), _on_ready_read)
        reply.connect(reply, SIGNAL("NetworkError()"), _on_network_error)
        reply.connect(reply, SIGNAL("finished()"), _on_finished)
        self._debug(INFO, "Start download: %s" % url)

    def _wait_load(self, timeout=None):
        self._events_loop(0.0)
        if self._load_status is not None:
            load_status = self._load_status
            self._load_status = None
            return load_status        
        itime = time.time()
        while self._load_status is None:
            if timeout and time.time() - itime > timeout:
                raise SpynnerTimeout("Timeout reached: %d seconds" % timeout)
            self._events_loop()
        self._events_loop(0.0)
        if self._load_status:
            jscode = "var %s = jQuery.noConflict();" % self.jslib
            self.runjs(self.javascript + jscode, debug=False)
            self.webpage.setViewportSize(self.webpage.mainFrame().contentsSize())            
        load_status = self._load_status
        self._load_status = None
        return load_status        

    def _debug(self, level, *args):
        if level <= self.debug_level:
            kwargs = dict(outfd=self.debug_stream)
            _debug(*args, **kwargs)

    def _user_agent_for_url(self, url):
        if self.user_agent:
            return self.user_agent
        return QWebPage.userAgentForUrl(self.webpage, url)

    def get_js_obj_length(self, res):
        if res.type() != res.Map:
            return False
        resmap = res.toMap()
        lenfield = QString(u'length')
        if lenfield not in resmap:
            return False
        return resmap[lenfield].toInt()[0]
    
    def jslen(self, selector):
        res = self.runjs("%s('%s')" % (self.jslib, selector))
        return self.get_js_obj_length(res)
    
    def _runjs_on_jquery(self, name, code):
        res = self.runjs(code)
        if self.get_js_obj_length(res) < 1:
            raise SpynnerJavascriptError("error on %s: %s" % (name, code))

    def _get_html(self):
        return unicode(self.webframe.toHtml())

    def _get_soup(self):
        if not self._html_parser:
            raise SpynnerError("Cannot get soup with no HTML parser defined")
        return self._html_parser(self.html)

    def _get_url(self):
        return unicode(self.webframe.url().toString())

    # Properties
                 
    url = property(_get_url)
    """Current URL."""        
                 
    html = property(_get_html)
    """Rendered HTML in current page."""
                 
    #soup = property(_get_soup)
    soup = None #change to none so that changes are retained through mulitple calls
    """HTML soup (see L{set_html_parser})."""
               
    #{ Basic interaction with browser

    def load(self, url):
        """Load a web page and return status (a boolean)."""
        self.webframe.load(QUrl(url))
        return self._wait_load()

    def load_request(self, req):
        """Load a network request and return status (a boolean)."""
        self.webframe.load(req)
        return self._wait_load()

    def wait_requests(self, wait_requests = None, url = None, url_regex = None):
        if wait_requests:
            while self._replies < wait_requests:
                self._events_loop()
            self._events_loop(0.0)
        if url_regex or url:
            last_replies = self._replies
            while True:
                if last_replies != self._replies:
                    if url_regex:
                        if re.search(url_regex, self._reply_url):
                            break
                    elif url:
                        if url == self._reply_url:
                            break
                self._events_loop()
            self._events_loop(0.0)
    
    def click(self, selector, wait_load=False, wait_requests=None, timeout=None):
        """
        Click any clickable element in page.
        
        @param selector: jQuery selector.
        @param wait_load: If True, it will wait until a new page is loaded.
        @param timeout: Seconds to wait for the page to load before 
                                       raising an exception.
        @param wait_requests: How many requests to wait before returning. Useful
                              for AJAX requests.
    
        By default this method will not wait for a page to load. 
        If you are clicking a link or submit button, you must call this
        method with C{wait_load=True} or, alternatively, call 
        L{wait_load} afterwards. However, the recommended way it to use 
        L{click_link}.
                        
        When a non-HTML file is clicked this method will download it. The 
        file is automatically saved keeping the original structure (as 
        wget --recursive does). For example, a file with URL 
        I{http://server.org/dir1/dir2/file.ext} will be saved to  
        L{download_directory}/I{server.org/dir1/dir2/file.ext}.                 
        """
        jscode = "%s('%s').simulate('click')" % (self.jslib, selector)
        self._replies = 0
        self._runjs_on_jquery("click", jscode)
        self.wait_requests(wait_requests)
        if wait_load:
            return self._wait_load(timeout)

    def click_link(self, selector, timeout=None):
        """Click a link and wait for the page to load."""
        return self.click(selector, wait_load=True, timeout=timeout)

    def click_ajax(self, selector, wait_requests=1, timeout=None):
        """Click a AJAX link and wait for the request to finish."""
        return self.click(selector, wait_requests=wait_requests, timeout=timeout)
    
    def wait_load(self, timeout=None):
        """
        Wait until the page is loaded.
        
        @param timeout: Time to wait (seconds) for the page load to complete.
        @return: Boolean state
        @raise SpynnerTimeout: If timeout is reached.
        """
        return self._wait_load(timeout)

    def wait(self, waittime):
        """
        Wait some time.
        
        @param waittime: Time to wait (seconds).
        
        This is an active wait, the events loop will be run, so it
        may be useful to wait for synchronous Javascript events that
        change the DOM.
        """   
        itime = time.time()
        while time.time() - itime < waittime:
            self._events_loop()        

    def close(self):
        """Close Browser instance and release resources."""        
        if self.webview:
            self.destroy_webview()
        if self.webpage:
            del self.webpage

    #}
                      
    #{ Webview
    
    def create_webview(self, show=False):
        """Create a QWebView object and insert current QWebPage."""
        if self.webview:
            raise SpynnerError("Cannot create webview (already initialized)")
        self.webview = QWebView()
        self.webview.setPage(self.webpage)
        window = self.webview.window()
        window.setAttribute(Qt.WA_DeleteOnClose)
        window.connect(window, SIGNAL('destroyed(QObject *)'),
            self._on_webview_destroyed)
        if show:
            self.show()

    def destroy_webview(self):
        """Destroy current QWebView."""
        if not self.webview:
            raise SpynnerError("Cannot destroy webview (not initialized)")
        del self.webview 

    def show(self):
        """Show webview browser."""
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.webview.show()

    def hide(self):
        """Hide webview browser."""
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.webview.hide()

    def browse(self):
        """Let the user browse the current page (infinite loop).""" 
        if not self.webview:
            raise SpynnerError("Webview is not initialized")
        self.show()
        while self.webview:
            self._events_loop()

    #}

    #{ Webframe

    def set_webframe_to_default(self):
        self.webframe = self.webpage.mainFrame()

    def set_webframe(self, framenumber):
        cf = self.webframe.childFrames()
	
        try:
           self.webframe = cf[int(framenumber)]
        except:
            raise SpynnerError("childframe does not exist")
		
	"""Inject jquery into frame"""
        jscode = "var %s = jQuery.noConflict();" % self.jslib
        self.runjs(self.javascript + jscode, debug=False)

    #}
                        
    #{ Form manipulation
    
    def fill(self, selector, value):
        """Fill an input text with a string value using a jQuery selector."""
        escaped_value = value.replace("'", "\\'")
        jscode = "%s('%s').val('%s')" % (self.jslib, selector, escaped_value)
        self._runjs_on_jquery("fill", jscode)

    def check(self, selector):
        """Check an input checkbox using a jQuery selector."""
        jscode = "%s('%s').attr('checked', true)" % (self.jslib, selector)
        self._runjs_on_jquery("check", jscode)

    def uncheck(self, selector):
        """Uncheck input checkbox using a jQuery selector"""
        jscode = "%s('%s').attr('checked', false)" % (self.jslib, selector)
        self._runjs_on_jquery("uncheck", jscode)

    def choose(self, selector, value):
        """Choose a radio input using a jQuery selector."""
        escaped_value = value.replace("'", "\\'")
        jscode = "%s('%s').filter('[value=%s]').simulate('click')" % (self.jslib, selector, escaped_value)
        self._runjs_on_jquery("choose", jscode)


    def select(self, selector):        
        """Choose a option in a select using a jQuery selector."""
        jscode = "%s('%s').attr('selected', 'selected')" % (self.jslib, selector)
        self._runjs_on_jquery("select", jscode)
    
    submit = click_link
      
    #}
    
    #{ Javascript 
    
    def runjs(self, jscode, debug=True):
        """
        Inject Javascript code into the current context of page.

        @param jscode: Javascript code to injected.
        @param debug: Set to False to disable debug output for this injection.
        
        You can call Jquery even if the original page does not include it 
        as Spynner injects the library for every loaded page. You must 
        use C{jq(...)} instead of of C{jQuery} or the common {$(...)} 
        shortcut. 
        
        @note: You can change the jq alias (see L{jslib}).        
        """
        if debug:
            self._debug(DEBUG, "Run Javascript code: %s" % jscode)

        #XXX evaluating JS twice must be wrong but finding the bug is proving tricky...
        #JavaScriptCore/interpreter/Interpreter.cpp and JavaScriptCore/runtime/Completion.cpp
        #JavaScriptCore/runtime/Completion.cpp is catching an exception (sometimes) and 
        #returning "TypeError: Type error" - BUT it looks like the JS does complete after
        #the function has already returned
        r = self.webframe.evaluateJavaScript(jscode)
        if r.isValid() == False:
            r = self.webframe.evaluateJavaScript(jscode)
        return r

    def set_javascript_confirm_callback(self, callback):
        """
        Set function callback for Javascript confirm pop-ups.
        
        By default Javascript confirmations are not answered. If the webpage
        you are working pops Javascript confirmations, be sure to set a callback
        for them. 
        
        Calback signature: C{javascript_confirm_callback(url, message)}
        
            - url: Url where the popup was launched.        
            - param message: String message.
        
        The callback should return a boolean (True meaning 'yes', False meaning 'no')
        """
        self._javascript_confirm_callback = callback

    def set_javascript_prompt_callback(self, callback):
        """
        Set function callback for Javascript prompt.
        
        By default Javascript prompts are not answered. If the webpage
        you are working pops Javascript prompts, be sure to set a callback
        for them. 
        
        Callback signature: C{javascript_prompt_callback(url, message, defaultvalue)}
        
            - url: Url where the popup prompt was launched.
            - message: String message.
            - defaultvalue: Default value for prompt answer
            
        The callback should return a string with the answer or None to cancel the prompt.
        """
        self._javascript_prompt_callback = callback

    #}
    
    #{ Cookies
    
    def get_cookies(self):
        """Return string containing the current cookies in Mozilla format.""" 
        return self.cookiesjar.mozillaCookies()

    def set_cookies(self, string_cookies):
        """Set cookies from a string with Mozilla-format cookies.""" 
        return self.cookiesjar.setMozillaCookies(string_cookies)

    #}

    #{ Proxies

    def get_proxy(self):
        """Return string containing the current proxy."""
        return self.manager.proxy()

    def set_proxy(self, string_proxy):
        """Set proxy [http|socks5]://username:password@hostname:port"""
        urlinfo = urlparse.urlparse(string_proxy)

        proxy = QNetworkProxy()
        if urlinfo.scheme == 'socks5' :
                proxy.setType(1)
        elif urlinfo.scheme == 'http' :
                proxy.setType(3)
        else : 
                proxy.setType(2)
                self.manager.setProxy(proxy)
                return self.manager.proxy()

        proxy.setHostName(urlinfo.hostname)
        proxy.setPort(urlinfo.port)
        if urlinfo.username != None :
                proxy.setUser(urlinfo.username)
        else :
                proxy.setUser('')

        if urlinfo.password != None :
                proxy.setPassword(urlinfo.password)
        else :
                proxy.setPassword('')

        self.manager.setProxy(proxy)
        return self.manager.proxy()
      
    #}
    
    #{ Download files
                
    def download(self, url, outfd=None):
        """
        Download a given URL using current cookies.
        
        @param url: URL or path to download
        @param outfd: Output file-like stream. If None, return data string.
        @return: Bytes downloaded (None if something went wrong)
        @note: If url is a path, the current base URL will be pre-appended.        
        """
        def _on_reply(reply):
            url = unicode(reply.url().toString())
            self._download_reply_status = not bool(reply.error())
        self._download_reply_status = None
        if not urlparse.urlsplit(url).scheme:
            url = urlparse.urljoin(self.url, url) 
        request = QNetworkRequest(QUrl(url))
        # Create a new manager to process this download        
        manager = QNetworkAccessManager()
        reply = manager.get(request)
        if reply.error():
            raise SpynnerError("Download error: %s" % reply.errorString())
        reply.downloaded_nbytes = 0
        manager.setCookieJar(self.manager.cookieJar())
        manager.connect(manager, SIGNAL('finished(QNetworkReply *)'), _on_reply)
        outfd_set = bool(outfd)
        if not outfd_set:
            outfd = StringIO()            
        self._start_download(reply, outfd)
        while self._download_reply_status is None:
            self._events_loop()
        if outfd_set:
            return (reply.downloaded_nbytes if not reply.error() else None)
        else:
            return outfd.getvalue()  
    
    #}
            
    #{ HTML and tag soup parsing
    
    def set_html_parser(self, parser):
        """
        Set HTML parser used to generate the HTML L{soup}.
        
        @param parser: Callback called to generate the soup.
        
        When a HTML parser is set for a Browser, the property L{soup} returns
        the parsed HTML.        
        """
        self._html_parser = parser

    def html_contains(self, regexp):
        """Return True if current HTML contains a given regular expression."""
        return bool(re.search(regexp, self.html))

    #}

    #{ HTTP Authentication
     
    def set_http_authentication_callback(self, callback):
        """
        Set HTTP authentication request callback.
        
        The callback must have this signature: 
        
        C{http_authentication_callback(url, realm)}: 
                        
            - C{url}: URL where the requested was made.
            - C{realm}: Realm requiring authentication.
            
        The callback should return a pair of string containing (user, password) 
        or None if you don't want to answer.
        """
        self._http_authentication_callback = callback
    
    #}
             
    #{ Miscellaneous
    
    def snapshot(self, box=None, format=QImage.Format_ARGB32):
        """        
        Take an image snapshot of the current frame.
        
        @param box: 4-element tuple containing box to capture (x1, y1, x2, y2).
                    If None, capture the whole page.
        @param format: QImage format (see QImage::Format_*).
        @return: A QImage image.
        
        Typical usage:
        
        >>> browser.load(url)
        >>> browser.snapshot().save("webpage.png") 
        """
        if box:
            x1, y1, x2, y2 = box        
            w, h = (x2 - x1), (y2 - y1)
            image0 = QImage(QSize(x2, y2), format)
            painter = QPainter(image0)
            self.webpage.mainFrame().render(painter)
            painter.end()
            image = image0.copy(x1, y1, w, h)
        else:
            image = QImage(self.webpage.viewportSize(), format)
            painter = QPainter(image)                        
            self.webpage.mainFrame().render(painter)
            painter.end()
        return image
            
    def get_url_from_path(self, path):
        """Return the URL for a given path using the current URL as base."""
        return urlparse.urljoin(self.url, path)

    def set_url_filter(self, url_filter):
        """
        Set function callback to filter URL.
        
        By default all requested elements of a page are loaded. That includes 
        stylesheets, images and many other elements that you may not need at all.         
        Use this method to define the callback that will be called every time 
        a new request is made. The callback must have this signature: 
        
        C{my_url_filter(operation, url)}: 
                        
            - C{operation}: string with HTTP operation: C{get}, C{head}, 
                            C{post} or C{put}.
            - C{url}: requested item URL.
            
        It should return C{True} (proceed) or C{False} (reject).
        """
        self._url_filter = url_filter
Ejemplo n.º 35
0
class _WebkitRendererHelper(QObject):
    """This helper class is doing the real work. It is required to
    allow WebkitRenderer.render() to be called "asynchronously"
    (but always from Qt's GUI thread).
    """

    def __init__(self, parent):
        """Copies the properties from the parent (WebkitRenderer) object,
        creates the required instances of QWebPage, QWebView and QMainWindow
        and registers some Slots.
        """
        QObject.__init__(self)

        # Copy properties from parent
        for key, value in parent.__dict__.items():
            setattr(self, key, value)

        # Create and connect required PyQt4 objects
        self._page = QWebPage()
        self._view = QWebView()
        self._view.setPage(self._page)
        self._window = QMainWindow()
        self._window.setCentralWidget(self._view)

        # Import QWebSettings
        for key, value in self.qWebSettings.iteritems():
            self._page.settings().setAttribute(key, value)

        # Connect required event listeners
        self.connect(
            self._page, SIGNAL("loadFinished(bool)"),
            self._on_load_finished
        )
        self.connect(
            self._page, SIGNAL("loadStarted()"),
            self._on_load_started
        )
        self.connect(
            self._page.networkAccessManager(),
            SIGNAL("sslErrors(QNetworkReply *,const QList<QSslError>&)"),
            self._on_ssl_errors
        )
        self.connect(
            self._page.networkAccessManager(),
            SIGNAL("finished(QNetworkReply *)"), self._on_each_reply
        )

        # The way we will use this, it seems to be unesseccary to have
        # Scrollbars enabled.
        self._page.mainFrame().setScrollBarPolicy(
            Qt.Horizontal, Qt.ScrollBarAlwaysOff
        )
        self._page.mainFrame().setScrollBarPolicy(
            Qt.Vertical, Qt.ScrollBarAlwaysOff
        )
        self._page.settings().setUserStyleSheetUrl(
            QUrl("data:text/css,html,body{overflow-y:hidden !important;}")
        )

        # Show this widget
        self._window.show()

    def __del__(self):
        """Clean up Qt4 objects. """
        self._window.close()
        del self._window
        del self._view
        del self._page

    def render(self, url):
        """The real worker. Loads the page (_load_page) and awaits
        the end of the given 'delay'. While it is waiting outstanding
        QApplication events are processed.
        After the given delay, the Window or Widget (depends
        on the value of 'grabWholeWindow' is drawn into a QPixmap
        and postprocessed (_post_process_image).
        """
        self._load_page(url, self.width, self.height, self.timeout)
        # Wait for end of timer. In this time, process
        # other outstanding Qt events.
        if self.wait > 0:
            if self.logger:
                self.logger.debug("Waiting %d seconds " % self.wait)

            waitToTime = time.time() + self.wait
            while time.time() < waitToTime and QApplication.hasPendingEvents():
                QApplication.processEvents()

        if self.renderTransparentBackground:
            # Another possible drawing solution
            image = QImage(self._page.viewportSize(), QImage.Format_ARGB32)
            image.fill(QColor(255, 0, 0, 0).rgba())

            # http://ariya.blogspot.com/2009/04/transparent-qwebview-and-qwebpage.html
            palette = self._view.palette()
            palette.setBrush(QPalette.Base, Qt.transparent)
            self._page.setPalette(palette)
            self._view.setAttribute(Qt.WA_OpaquePaintEvent, False)

            painter = QPainter(image)
            painter.setBackgroundMode(Qt.TransparentMode)
            self._page.mainFrame().render(painter)
            painter.end()
        else:
            if self.grabWholeWindow:
                # Note that this does not fully ensure that the
                # window still has the focus when the screen is
                # grabbed. This might result in a race condition.
                self._view.activateWindow()
                image = QPixmap.grabWindow(self._window.winId())
            else:
                image = QPixmap.grabWidget(self._window)

        return self._post_process_image(image)

    def _load_page(self, url, width, height, timeout):
        """
        This method implements the logic for retrieving and displaying
        the requested page.
        """

        # This is an event-based application. So we have to wait until
        # "loadFinished(bool)" raised.
        cancelAt = time.time() + timeout
        self.__loading = True
        self.__loadingResult = False  # Default
        # TODO: fromEncoded() needs to be used in some situations.  Some
        # sort of flag should be passed in to WebkitRenderer maybe?
        #self._page.mainFrame().load(QUrl.fromEncoded(url))
        self._page.mainFrame().load(QUrl(url))
        while self.__loading:
            if timeout > 0 and time.time() >= cancelAt:
                raise RuntimeError("Request timed out on %s" % url)
            while QApplication.hasPendingEvents() and self.__loading:
                QCoreApplication.processEvents()

        if self.logger:
            self.logger.debug("Processing result")

        if not self.__loading_result:
            if self.logger:
                self.logger.warning("Failed to load %s" % url)
                raise BadURLException("Failed to load %s" % url)

        # Set initial viewport (the size of the "window")
        size = self._page.mainFrame().contentsSize()
        if self.logger:
            self.logger.debug("contentsSize: %s", size)

        if width > 0:
            size.setWidth(width)
        if height > 0:
            size.setHeight(height)

        self._window.resize(size)

    def _post_process_image(self, qImage):
        """If 'scaleToWidth' or 'scaleToHeight' are set to a value
        greater than zero this method will scale the image
        using the method defined in 'scaleRatio'.
        """
        if self.scaleToWidth > 0 or self.scaleToHeight > 0:
            # Scale this image
            if self.scaleRatio == 'keep':
                ratio = Qt.KeepAspectRatio
            elif self.scaleRatio in ['expand', 'crop']:
                ratio = Qt.KeepAspectRatioByExpanding
            else:  # 'ignore'
                ratio = Qt.IgnoreAspectRatio
            qImage = qImage.scaled(
                self.scaleToWidth, self.scaleToHeight, ratio
            )
            if self.scaleRatio == 'crop':
                qImage = qImage.copy(
                    0, 0, self.scaleToWidth, self.scaleToHeight
                )
        return qImage

    def _on_each_reply(self, reply):
        """Logs each requested uri"""
        self.logger.debug("Received %s" % (reply.url().toString()))

    # Eventhandler for "loadStarted()" signal
    def _on_load_started(self):
        """Slot that sets the '__loading' property to true."""
        if self.logger:
            self.logger.debug("loading started")

        self.__loading = True

    # Eventhandler for "loadFinished(bool)" signal
    def _on_load_finished(self, result):
        """Slot that sets the '__loading' property to false and stores
        the result code in '__loading_result'.
        """
        if self.logger:
            self.logger.debug("loading finished with result %s", result)

        self.__loading = False
        self.__loading_result = result

    # Eventhandler for "sslErrors(QNetworkReply *,const QList<QSslError>&)"
    # signal.
    def _on_ssl_errors(self, reply, errors):
        """Slot that writes SSL warnings into the log but ignores them."""
        for e in errors:
            if self.logger:
                self.logger.warn("SSL: " + e.errorString())

        reply.ignoreSslErrors()
Ejemplo n.º 36
0
class WebkitRenderer(QObject):

    # Initializes the QWebPage object and registers some slots
    def __init__(self):
        logging.debug("Initializing class %s", self.__class__.__name__)
        self._page = QWebPage()
        self.connect(self._page, SIGNAL("loadFinished(bool)"), self.__on_load_finished)
        self.connect(self._page, SIGNAL("loadStarted()"), self.__on_load_started)

        # The way we will use this, it seems to be unesseccary to have Scrollbars enabled
        self._page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
        self._page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)

        # Helper for multithreaded communication through signals 
        self.__loading = False
        self.__loading_result = False

    # Loads "url" and renders it.
    # Returns QImage-object on success.
    def render(self, url, width=0, height=0, timeout=10):
        logging.debug("render(%s, timeout=%d)", url, timeout)

        # This is an event-based application. So we have to wait until
        # "loadFinished(bool)" raised.
        cancelAt = time.time() + timeout
        self._page.mainFrame().load(QUrl(url))
        while self.__loading:
            if timeout > 0 and time.time() >= cancelAt:
                raise RuntimeError("Request timed out")
            QCoreApplication.processEvents()

        logging.debug("Processing result")

        if self.__loading_result == False:
            raise RuntimeError("Failed to load %s" % url)

        # Set initial viewport (the size of the "window")
        size = self._page.mainFrame().contentsSize()
        if width > 0:
            size.setWidth(width)
        if height > 0:
            size.setHeight(height)
        self._page.setViewportSize(size)

        # Paint this frame into an image
        image = QImage(self._page.viewportSize(), QImage.Format_ARGB32)
        painter = QPainter(image)
        self._page.mainFrame().render(painter)
        painter.end()

        return image


    # Eventhandler for "loadStarted()" signal
    def __on_load_started(self):
        logging.debug("loading started")
        self.__loading = True

    # Eventhandler for "loadFinished(bool)" signal
    def __on_load_finished(self, result):
        logging.debug("loading finished with result %s", result)
        self.__loading = False
        self.__loading_result = result
Ejemplo n.º 37
0
class Crawler:
    geneToOrthologs = {}
    geneToSpecies = {}
    geneSequences = {}
    geneFamilies = None  # A list of sets containing the proteins in that family
    allSpecies = None
    species1Names = None
    species2Names = None
    speciesPairs = []
    malformedXMLFiles = []
    def main(self):
        if not os.path.isdir(run_name):
            os.mkdir(run_name)
	if not os.path.isdir(run_name+'/clustalin'):
            os.mkdir(run_name+'/clustalin')
	if not os.path.isdir(run_name+'/clustalout'):
            os.mkdir(run_name+'/clustalout')
	if not os.path.isdir(run_name+'/roundup'):
            os.mkdir(run_name+'/roundup')
	if not os.path.isdir(run_name+'/mktest_out'):
            os.mkdir(run_name+'/mktest_out')
        self.load_species_names_list()
        self.fetch_uncached_orthologs()
        self.load_gene_list()
        self.find_gene_families()
        # self.output_gene_families()
        self.fetch_gene_sequences()
        self.align_families()
        self.mktest_families()
        exit(0)

############################################# load_species_name_list #############################################
    def load_species_names_list(self):
        if os.path.isfile('%s/species_names.json'%run_name):
            print "Loading cached species names..."
            sn = cjson.decode(open('%s/species_names.json'%run_name).read())
            self.allSpecies = sn['allSpecies']
            self.species1Names = sn['species1Names']
            self.species2Names = sn['species2Names']
        else:
            print "Fetching species names..."
            self.webpage = QWebPage()
            self.webpage.loadFinished.connect(self.process_organism_list)
            self.webpage.mainFrame().load(QUrl('http://roundup.hms.harvard.edu/retrieve/'))
            while self.allSpecies == None:
                time.sleep(.05)
                appInstance.processEvents()

    def process_organism_list(self, bool):
        organisms_query = 'select#id_genome_choices'
        organisms_element = self.webpage.mainFrame().findAllElements(organisms_query).at(0)
        elmt = organisms_element.firstChild()
        self.allSpecies = []
        while True:
            if elmt == organisms_element.lastChild():
                break
            self.allSpecies.append(str(elmt.attribute('value')))
            elmt = elmt.nextSibling()
        self.species1Names = filter(is_species_1, self.allSpecies)
        self.species2Names = filter(is_species_2, self.allSpecies)
        s_cnt, s1_cnt, s2_cnt = len(self.allSpecies), len(self.species1Names), len(self.species2Names)
        print "Found %i species, %i of type 1 and %i of type 2."%(s_cnt, s1_cnt, s2_cnt)
        savedict = {'allSpecies':self.allSpecies, 'species1Names':self.species1Names, 'species2Names':self.species2Names}
        open('%s/species_names.json'%run_name,'w').write(cjson.encode(savedict))

############################################# fetch_uncached_orthologs #############################################
    def fetch_uncached_orthologs(self):
        self.downloader_pool = eventlet.greenpool.GreenPool(size=5)
        self.pairs_to_download = []
        bridge_pairs = bridges(self.species1Names, self.species2Names)
        print "Bridges:\n\t%s"%('\n\t'.join(itertools.starmap(self.cache_name, bridge_pairs)))
        combs1 = len(self.species1Names)*(len(self.species1Names)-1)/2
        combs2 = len(self.species2Names)*(len(self.species2Names)-1)/2
        self.speciesPairs.extend(bridge_pairs)
        self.speciesPairs.extend(itertools.combinations(self.species1Names,2))
        self.speciesPairs.extend(itertools.combinations(self.species2Names,2))
        print "That's %i combinations of species1, %i of species2, %i bridges."%(combs1,combs2,len(bridge_pairs))
        numPairs = len(self.speciesPairs)
        for i in xrange(numPairs):
            l,r = self.speciesPairs[i]
            if i%20 == 0:
                print "%i%% (%i/%i)\x1B[1F"%(int(i*100.0/numPairs),i,numPairs)
            if not os.path.isfile('%s/roundup/%s.xml'%(run_name,self.cache_name(l,r))):
                self.pairs_to_download.append((l,r))
        num_to_dl = len(self.pairs_to_download)
        print "Fetching %i uncached combinations of species..."%num_to_dl
        pdp = self.downloader_pool.imap(self.fetch_pair, self.pairs_to_download)
        i=0
        for response in pdp:
            i+=1
            cachename = self.cache_name(*response)
            print "%i%% (%i/%i): %s\x1B[1F"%(int(i*100.0/num_to_dl), i, num_to_dl, cachename)

    def cache_name(self, lSpecies, rSpecies):
        name = lSpecies+'---'+rSpecies
        valid_chrs = '-_.() %s%s'%(string.ascii_letters, string.digits)
        filename = ''.join(c for c in name if c in valid_chrs)
        return filename

    def fetch_pair(self, (lSpecies, rSpecies)):
        while True:
            try:
                self.attempt_fetch_pair((lSpecies,rSpecies))
                break
            except urllib2.URLError as e:
                print "Error fetching (%s,%s): %s"%(lSpecies,rSpecies,e)
        return (lSpecies,rSpecies)
Ejemplo n.º 38
0
Archivo: ss.py Proyecto: chemila/bin
    #screen = QtGui.QDesktopWidget().screenGeometry()
    size = webpage.mainFrame().contentsSize()
    # Set the size of the (virtual) browser window
    webpage.setViewportSize(webpage.mainFrame().contentsSize())

    # Paint this frame into an image
    image = QImage(webpage.viewportSize(), QImage.Format_ARGB32)
    painter = QPainter(image)
    webpage.mainFrame().render(painter)
    painter.end()

    image.save("/tmp/output.png")
    sys.exit(0)

qtargs = [sys.argv[0]]
qtargs.append("-display")
qtargs.append(":0")

app = QApplication(qtargs,True)
#app = QApplication(sys.argv)
signal.signal(signal.SIGINT, signal.SIG_DFL)

webpage = QWebPage()
webpage.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
webpage.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff)
webpage.connect(webpage, SIGNAL("loadFinished(bool)"), onLoadFinished)
webpage.mainFrame().load(QUrl(sys.argv[1]))

sys.exit(app.exec_())
Ejemplo n.º 39
0
import sys
import signal
import os
from PyQt4.QtCore import *
from PyQt4.QtGui import *
from PyQt4.QtWebKit import QWebPage

app = QApplication(sys.argv)
signal.signal(signal.SIGINT, signal.SIG_DFL)
webpage = QWebPage()
def onLoadFinished(result):
    if not result:
        print "Request failed"
        sys.exit(1)

    webpage.setViewportSize(webpage.mainFrame().contentsSize())
    image = QImage(webpage.viewportSize(), QImage.Format_ARGB32)
    painter = QPainter(image)
    webpage.mainFrame().render(painter)
    painter.end()
    if os.path.exists("output.png"):
        os.remove("output.png")
    image.save("output.png")
    sys.exit(0) # quit this application
webpage.mainFrame().load(QUrl("http://google.pl"))
webpage.connect(webpage, SIGNAL("loadFinished(bool)"), onLoadFinished)

sys.exit(app.exec_())