Beispiel #1
0
def highlights(files, options):
    """Print notes from highlighted text.

    All is printed to output or error stream (usually stdout and stderr).

    Options:
    * options.recursive     Handle directories
    * options.use_title     Print filename/document title instead of full path
    * options.valid_types   PDF annotation types to process
    * options.filter_keys   Only print stated keys.
    * options.remove_key    Don't print key tags
    * options.with_path     Print the file path with each line
    * options.with_page     Print the page number with each line
    * options.buffered      Buffer output
    * options.list_keys     Print key only
    * options.stdout        Output stream
    * options.stderr        Error stream

    """
    for path in files:
        if os.path.isdir(path):
            if options.recursive:
                highlights([os.path.join(path, p) for p in os.listdir(path)],
                           options)
            continue  # Omit directories

        document = Pdf(path, options, pgm=sys.argv[0])

        if options.list_keys:
            options.remove_key = False
            for item in document.annotations(options):
                list_keys(item.note, options)
        else:
            for item in document.annotations(options):
                print_note(item.note, item.page, options)
Beispiel #2
0
 def test_to_pdf(self):
     first_html = "<h1>Hola Mundo</h1>"
     pdf = Pdf(first_html)
     fh = open('tests/fixtures/hola_mundo.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.to_pdf(first_html)
     self.assertPdfEquals(output, data)
Beispiel #3
0
 def test_to_pdf(self):
     first_html = "<h1>Hola Mundo</h1>"
     pdf = Pdf(first_html)
     fh = open('tests/fixtures/hola_mundo.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.to_pdf(first_html)
     self.assertPdfEquals(output, data)
Beispiel #4
0
 def test_get_stream(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     fh = open('tests/fixtures/stream.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.get_stream()
     self.assertPdfEquals(output, data)
Beispiel #5
0
 def test_get_stream(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     fh = open('tests/fixtures/stream.pdf', 'rb')
     data = fh.read()
     fh.close()
     output = pdf.get_stream()
     self.assertPdfEquals(output, data)
Beispiel #6
0
def main():
    pdf_path = path.abspath('cover.pdf')
    out_folder = path.abspath('out')
    config_path = path.abspath('config.json')

    parser = argparse.ArgumentParser()
    parser.add_argument('cover_type', help='Define the type of cover')
    args = parser.parse_args()

    config = Config(config_path, cover_type=args.cover_type)

    pdf = Pdf(pdf_path)
    pdf.set_cropbox(config.get_cover_geometry())

    img = Img(pdf.cover.name, output_folder=out_folder)
    for width in config.get_output_width():
        img.convert(width)
Beispiel #7
0
    def execute(self):
        pdf = Pdf()

        try:
            pdf.load(self.config.getInputFilePath())
            for effector in self.effectorChain:
                pdf = effector.apply(pdf)

            pdf.save(self.config.getOutputFilePath())
        finally:
            pdf.close()
Beispiel #8
0
 def confirmEmailSend(self):
     msg = messagebox.askquestion('Send Email', 'Are you sure to send email', icon='warning')
     if msg == 'yes':
         re_email = self.email_entry.get()
         print(re_email.strip())
         db = Database()
         querys = db.fetch_today()
         db.db_close()
         pdf = Pdf()
         pdf.get_pdf(querys)
         send_email = EmailSend(re_email.strip())
         send_email.send_email()
         messagebox.showinfo('Done','Your email has been sent')
         self.emailWin.quit()
         self.emailWin.destroy()
         
     else:
         self.emailWin.quit()
         self.emailWin.destroy()
         messagebox.showerror('Cancelled','Your email is not forwarded')    
Beispiel #9
0
def generar_pdf_concatenado():
    id_retiros = request.form.to_dict(False).get('recibo')
    if id_retiros:
        retiros = models.Retiro.select().where(id__in = id_retiros)
        to_text = Traductor().to_text
        pdf = Pdf()

        for retiro in retiros:
            html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

        titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string())

        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo)
        archivo_temporal = open(nombre_archivo, 'wb')
        archivo_temporal.write(pdf.get_stream())
        archivo_temporal.close()

        return jsonify(name=titulo)
    else:
        abort(404)
Beispiel #10
0
def generar_zip_contenedor():
    archivos_pdf_generados = []
    id_retiros = request.form.to_dict(False).get('recibo')

    if id_retiros:
        retiros = models.Retiro.select().where(id__in=id_retiros)
        to_text = Traductor().to_text

        for retiro in retiros:
            pdf = Pdf()
            html = render_template("recibo.html",
                                   cooperativa=retiro.socio.cooperativa,
                                   retiro=retiro,
                                   monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

            titulo = models.Retiro.obtener_nombre_por_id(retiro.id)
            nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'],
                                          titulo + ".pdf")

            archivos_pdf_generados.append(nombre_archivo)
            archivo_temporal = open(nombre_archivo, 'wb')
            archivo_temporal.write(pdf.get_stream())
            archivo_temporal.close()

        import zipfile

        nombre = "recibos_agrupados_%s.zip" % (retiro.fecha_como_string())
        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre)
        zip = zipfile.ZipFile(nombre_archivo, mode='w')

        for nombre_pdf in archivos_pdf_generados:
            print "zipeando", [nombre_pdf]
            zip.write(nombre_pdf)

        zip.close()

        return jsonify(name=nombre)
    else:
        abort(404)
Beispiel #11
0
def generar_pdf_concatenado():
    id_retiros = request.form.to_dict(False).get('recibo')
    if id_retiros:
        retiros = models.Retiro.select().where(id__in=id_retiros)
        to_text = Traductor().to_text
        pdf = Pdf()

        for retiro in retiros:
            html = render_template("recibo.html",
                                   cooperativa=retiro.socio.cooperativa,
                                   retiro=retiro,
                                   monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

        titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string())

        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo)
        archivo_temporal = open(nombre_archivo, 'wb')
        archivo_temporal.write(pdf.get_stream())
        archivo_temporal.close()

        return jsonify(name=titulo)
    else:
        abort(404)
Beispiel #12
0
def generar_zip_contenedor():
    archivos_pdf_generados = []
    id_retiros = request.form.to_dict(False).get('recibo')

    if id_retiros:
        retiros = models.Retiro.select().where(id__in=id_retiros)
        to_text = Traductor().to_text

        for retiro in retiros:
            pdf = Pdf()
            html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto))
            pdf.append(html)

            titulo = models.Retiro.obtener_nombre_por_id(retiro.id)
            nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo + ".pdf")

            archivos_pdf_generados.append(nombre_archivo)
            archivo_temporal = open(nombre_archivo, 'wb')
            archivo_temporal.write(pdf.get_stream())
            archivo_temporal.close()

        import zipfile

        nombre = "recibos_agrupados_%s.zip" %(retiro.fecha_como_string())
        nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre)
        zip = zipfile.ZipFile(nombre_archivo, mode='w')

        for nombre_pdf in archivos_pdf_generados:
          print "zipeando", [nombre_pdf]
          zip.write(nombre_pdf)

        zip.close()

        return jsonify(name=nombre)
    else:
        abort(404)
Beispiel #13
0
    def __init__(self, app, network_manager, wait_timeout=20, wait_callback=None,
                viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2',
                log_level=30, download_images=True, plugins_enabled=False,
                java_enabled=False, create_page_callback=None,
                is_popup=False, max_resource_queued=None,
                *args, **kargs):
        
        super(GhostWebPage, self).__init__(parent=app)
        self._app = app
        self.pdf_engine = Pdf()
        self.http_resources = []
        self.http_resource_page = None
        self.max_resource_queued = max_resource_queued
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.loaded = True
        self.create_page_callback = create_page_callback
        self.is_popup = is_popup
        # Internal library object
        self.ghostInit =  GhostInit()
        
        self.setForwardUnsupportedContent(True)
        self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptCanOpenWindows, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.loadFinished.connect(self._page_loaded)
        self.loadStarted.connect(self._page_load_started)
        self.loadProgress.connect(self._page_load_progress)
        self.unsupportedContent.connect(self._unsupported_content)
        self.network_manager = network_manager
        self.setNetworkAccessManager(self.network_manager)
        self.network_manager.finished.connect(self._request_ended)
        # User Agent
        self.setUserAgent(user_agent)

        self.main_frame = self.mainFrame()
        self._unsupported_files = {}
        self.windowCloseRequested.connect(self._closeWindow)
        
        logger.setLevel(log_level)
Beispiel #14
0
def pdf_split():
    pdf = Pdf(title='PDF 文档分割', mode='split')
    pdf.construct_gui()
Beispiel #15
0
 def test_append(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     self.assertEquals(pdf.htmls, [first_html, second_html])
Beispiel #16
0
 def test_append(self):
     first_html = "<h1>Hola Mundo</h1>"
     second_html = "<h2>This is sparta</h2>"
     pdf = Pdf(first_html)
     pdf.append(second_html)
     self.assertEquals(pdf.htmls, [first_html, second_html])
Beispiel #17
0
def anedit(path, target, options):
    """Edit notes from highlights in PDF files.

    All is printed to standard input or standard error.

    Options:
    * options.valid_types   PDF annotation types to process
    * options.use_title     Print filename/document title instead of filename
    * options.filter_keys   Only print stated keys.
    * options.remove_key    Don't print key tags
    * options.verbose       Print varnings

    """
    # FIXME: Who guarantees this method is only executed on valid files?
    # Also check for pusher, hillieo, hilliep, ...

    # wordlist for normalization
    wordlist = Dictionary()

    # open document
    document = Pdf(path, options, pgm=sys.argv[0])

    # fetch notes
    notes = []
    for n_annot, item in enumerate(document.annotations(options)):
        sugg = annotation_fixes(item.note, wordlist, options.verbose)
        notes.append((item, sugg))

    # Walk through notes
    has_changes = False
    while len(notes) > 0:
        item, sugg = notes.pop(0)

        print ""
        print "\033[94m> {}: page {}, ETA {}\033[0m".format(
            item.page[0], item.page[1], len(notes))
        print "Original: ", item.note
        if item.note != sugg:
            print "Suggested:", sugg
        elif options.diffs:
            continue

        valid_answers = 'nyecisq?'
        prompt = '[{}]: '.format('/'.join(valid_answers.title()))
        ans = 'NEIN'
        while ans not in valid_answers:
            ans = raw_input(prompt) \
                .strip() \
                .lower() \
                .replace('yes', 'y') \
                .replace('no', 'n') \
                .replace('quit', 'q') \
                .replace('ignore', 'i') \
                .replace('ign', 'i') \
                .replace('edit', 'e') \
                .replace('correct', 'c') \
                .replace('skip', 's')

            if ans == '':
                ans = valid_answers[0]  # default is 'n'

            if ans == '?':
                ans = 'NEIN'
                print '''Usage:

    n   no      Stick with the original text (the default)
    y   yes     Accept the suggested text
    e   edit    Edit the original text
    c   change  Edit the suggested text
    i   ignore  Ignore for now (again prompted later)
    s   skip    Save and exit
    q   quit    Abort and exit (changes are lost)

                '''

        if ans == 'y':  # Use suggestion
            has_changes = True
            if item.key is None:
                item.set_content(sugg)
            else:
                item.set_content('<{}>{}</{}>'.format(item.key, sugg,
                                                      item.key))
        elif ans == 'n':  # Use original
            pass
        elif ans in ('e', 'c'):  # Edit manually

            def hook():
                curr = ans == 'e' and item.note or sugg
                curr = curr.replace('\n', '\\n')
                readline.insert_text(curr)
                readline.redisplay()

            readline.set_pre_input_hook(hook)
            sugg = raw_input().strip().replace('\\n', '\n')
            readline.set_pre_input_hook(None)
            notes.insert(0, (item, sugg))
        elif ans == 'i':  # Ignore note for now
            notes.append((item, sugg))
        elif ans == 'q':  # Quit immediately, don't save
            return
        elif ans == 's':  # Skip the rest
            break

    # save changes
    if has_changes:
        document.save(target, options)
Beispiel #18
0
def get_json_from_file(file_path):
    with open(file_path) as f:
        return json.load(f)


if __name__ == '__main__':
    if len(sys.argv) != 4:
        sys.stderr.write('Argument count is invalid!\n')
        exit(1)

    current_pdf_file_path = sys.argv[1]
    old_generated_JSON_file_path = sys.argv[2]
    new_generated_JSON_file_path = sys.argv[3]

    new_pdf = Pdf(current_pdf_file_path)
    old_json = get_json_from_file(old_generated_JSON_file_path)

    # Exit if PDF file is the same as the last time.
    new_pdf_hash = new_pdf.get_pdf_hash()
    old_pdf_hash = old_json["referenced_pdf_hash"]

    if new_pdf_hash == old_pdf_hash:
        print("PDF file hasn't changed since the last time. Exiting.")
        exit(1)

    new_addresses = new_pdf.get_addresses()
    old_addresses = {Address(**address) for address in old_json["addresses"]}

    # Google Geocoding calls are pretty expensive, so we don't really want to query every address every time.
    addresses_to_be_removed = old_addresses - new_addresses
Beispiel #19
0
def pdf_watermark():
    pdf = Pdf(title='PDF 文档加水印', mode='watermark')
    pdf.construct_gui()
Beispiel #20
0
def pdf_encrypt():
    pdf = Pdf(title='PDF 文档加密', mode='encrypt')
    pdf.construct_gui()
Beispiel #21
0
parser.add_argument('--templatify-forms-whitelist', '--tfw', default='')
parser.add_argument('--templatify-forms-uniquifier', '--tfu')
parser.add_argument('--templatify-forms-padding',
                    '--tfp',
                    type=int,
                    default=80)
parser.add_argument('--templatify-forms-custom-padding',
                    '--tfcp',
                    default='{}')
parser.add_argument('--templatify-forms-remove-dv',
                    '--tfrd',
                    action='store_true')
parser.add_argument('--save', '-s')
args = parser.parse_args()

pdf = Pdf().load(args.pdf)

if args.compare:
    other = Pdf().load(args.compare)
    for k, v in pdf.objects.items():
        if k not in other.objects:
            print('===== {} ===== missing from other'.format(k))
        if other.objects[k] != v:
            print(('===== {} =====\n'
                   '{}\n'
                   '\n'
                   '===== other =====\n'
                   '{}').format(k, v, other.objects[k]))
    for k in other.objects.keys():
        if k not in pdf.objects:
            print('===== {} ===== missing'.format(k))
Beispiel #22
0
class GhostWebPage(QWebPage):
    """Overrides QtWebKit.QWebPage in order to intercept some graphical
    behaviours like alert(), confirm().
    Also intercepts client side console.log().
    
    :param app: a QApplication that it's running Ghost.
    :param network_manager: a NetworkManager instance in charge of managing all the network
        requests.
    :param wait_timeout: Maximum step duration in second.
    :param wait_callback: An optional callable that is periodically
        executed until Ghost stops waiting.
    :param viewport_size: A tupple that sets initial viewport size.
    :param user_agent: The default User-Agent header.
    :param log_level: The optional logging level.
    :param download_images: Indicate if the browser download or not the images
    :param plugins_enabled: Enable plugins (like Flash).
    :param java_enabled: Enable Java JRE.
    :param create_page_callback: A method called when a popup it's opened
    :param is_popup: Boolean who indicate if the page it's a popup
    :param max_resource_queued: Indicates witch it's the max number of resources that can be
            saved in memory. If None then no limits are applied. If 0 then no resources are kept/
            If the number it's > 0 then the number of resources won't be more than max_resource_queued
    """
    user_agent = ""
    removeWindowFromList = pyqtSignal(object)

    _alert = None
    _confirm_expected = None
    _prompt_expected = None
    _upload_file = None
    _app = None

    def __init__(
            self,
            app,
            network_manager,
            wait_timeout=20,
            wait_callback=None,
            viewport_size=(800, 600),
            user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2',
            log_level=30,
            download_images=True,
            plugins_enabled=False,
            java_enabled=False,
            create_page_callback=None,
            is_popup=False,
            max_resource_queued=None,
            *args,
            **kargs):

        super(GhostWebPage, self).__init__(parent=app)
        self._app = app
        self.pdf_engine = Pdf()
        self.http_resources = []
        self.http_resource_page = None
        self.max_resource_queued = max_resource_queued
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.loaded = True
        self.create_page_callback = create_page_callback
        self.is_popup = is_popup
        # Internal library object
        self.ghostInit = GhostInit()

        self.setForwardUnsupportedContent(True)
        self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages,
                                     download_images)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled,
                                     True)
        self.settings().setAttribute(
            QtWebKit.QWebSettings.JavascriptCanOpenWindows, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled,
                                     plugins_enabled)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled,
                                     java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.loadFinished.connect(self._page_loaded)
        self.loadStarted.connect(self._page_load_started)
        self.loadProgress.connect(self._page_load_progress)
        self.unsupportedContent.connect(self._unsupported_content)
        self.network_manager = network_manager
        self.setNetworkAccessManager(self.network_manager)
        self.network_manager.finished.connect(self._request_ended)
        # User Agent
        self.setUserAgent(user_agent)

        self.main_frame = self.mainFrame()
        self._unsupported_files = {}
        self.windowCloseRequested.connect(self._closeWindow)

        logger.setLevel(log_level)

    class confirm:
        """Statement that tells Ghost how to deal with javascript confirm().

        :param confirm: A bollean that confirm.
        :param callable: A callable that returns a boolean for confirmation.
        """
        def __init__(self, confirm=True, callback=None):
            self.confirm = confirm
            self.callback = callback

        def __enter__(self):
            GhostWebPage._confirm_expected = (self.confirm, self.callback)

        def __exit__(self, type, value, traceback):
            GhostWebPage._confirm_expected = None

    class prompt:
        """Statement that tells Ghost how to deal with javascript prompt().

        :param value: A string value to fill in prompt.
        :param callback: A callable that returns the value to fill in.
        """
        def __init__(self, value='', callback=None):
            self.value = value
            self.callback = callback

        def __enter__(self):
            GhostWebPage._prompt_expected = (self.value, self.callback)

        def __exit__(self, type, value, traceback):
            GhostWebPage._prompt_expected = None

    def chooseFile(self, frame, suggested_file=None):
        return self._upload_file

    def javaScriptConsoleMessage(self, message, line, source):
        """Prints client console message in current output stream."""
        super(GhostWebPage,
              self).javaScriptConsoleMessage(message, line, source)
        log_type = "error" if "Error" in message else "info"
        Logger.log("%s(%d): %s" % (source or '<unknown>', line, message),
                   sender="Frame",
                   level=log_type)

    def javaScriptAlert(self, frame, message):
        """Notifies ghost for alert, then pass."""
        self._alert = message
        Logger.log("alert('%s')" % message, sender="Frame")

    def javaScriptConfirm(self, frame, message):
        """Checks if ghost is waiting for confirm, then returns the right
        value.
        """
        if GhostWebPage._confirm_expected is None:
            raise Exception('You must specified a value to confirm "%s"' %
                            message)

        confirmation, callback = GhostWebPage._confirm_expected
        GhostWebPage._confirm_expected = None
        Logger.log("confirm('%s')" % message, sender="Frame")
        if callback is not None:
            return callback()
        return confirmation

    def javaScriptPrompt(self, frame, message, defaultValue, result=None):
        """Checks if ghost is waiting for prompt, then enters the right
        value.
        """
        if GhostWebPage._prompt_expected is None:
            raise Exception('You must specified a value for prompt "%s"' %
                            message)
        result_value, callback = GhostWebPage._prompt_expected
        Logger.log("prompt('%s')" % message, sender="Frame")
        if callback is not None:
            result_value = callback()
        if result_value == '':
            Logger.log("'%s' prompt filled with empty string" % message,
                       level='warning')
        GhostWebPage._prompt_expected = None
        if result is None:
            # PySide
            return True, result_value
        result.append(result_value)
        return True

    def setUserAgent(self, user_agent):
        self.user_agent = user_agent

    def userAgentForUrl(self, url):
        return self.user_agent

    def acceptNavigationRequest(self, frame, request, ttype):
        self._lastUrl = request.url()
        return True

    def createWindow(self, ttype):
        page = None
        if self.create_page_callback is not None:
            page, name = self.create_page_callback(is_popup=True)
            page.open(self._lastUrl)

        return page

    def _closeWindow(self):
        #if self._main_window is not None:
        self.removeWindowFromList.emit(self)

    def switch_to_sub_window(self, index):
        """Change the focus to the sub window (popup)
        :param index: The index of the window, in the order that the
            window was opened
        """
        if len(self._windows) > index:
            self._windows[index].mainFrame().setFocus()
            return self._windows[index]
        return None

    def capture(self,
                region=None,
                selector=None,
                format=QImage.Format_ARGB32_Premultiplied):
        """Returns snapshot as QImage.

        :param region: An optional tupple containing region as pixel
            coodinates.
        :param selector: A selector targeted the element to crop on.
        :param format: The output image format.
        """
        if region is None and selector is not None:
            region = self.region_for_selector(selector)
        if region:
            x1, y1, x2, y2 = region
            w, h = (x2 - x1), (y2 - y1)
            image = QImage(QSize(x2, y2), format)
            painter = QPainter(image)
            self.currentFrame().render(painter)
            painter.end()
            image = image.copy(x1, y1, w, h)
        else:
            self.currentFrame().setScrollBarPolicy(
                QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff)
            self.currentFrame().setScrollBarPolicy(
                QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff)
            self.setViewportSize(self.currentFrame().contentsSize())
            image = QImage(self.viewportSize(), format)
            painter = QPainter(image)
            self.currentFrame().render(painter)
            painter.end()
        return image

    def capture_to(self,
                   path,
                   region=None,
                   selector=None,
                   format=QImage.Format_ARGB32_Premultiplied):
        """Saves snapshot as image.

        :param path: The destination path.
        :param region: An optional tupple containing region as pixel
            coodinates.
        :param selector: A selector targeted the element to crop on.
        :param format: The output image format.
            The available formats can be found here http://qt-project.org/doc/qt-4.8/qimage.html#Format-enum
            There is also a "pdf" format that will render the page into a pdf file
        """
        if str(format).startswith("pdf"):
            return self.pdf_engine.render_pdf(self, path)
        else:
            self.capture(region=region, format=format,
                         selector=selector).save(path)

    @client_utils_required
    def region_for_selector(self, selector):
        """Returns frame region for given selector as tupple.

        :param selector: The targeted element.
        """
        geo = self.currentFrame().findFirstElement(selector).geometry()
        try:
            region = (geo.left(), geo.top(), geo.right(), geo.bottom())
        except:
            raise Exception("can't get region for selector '%s'" % selector)
        return region

    @client_utils_required
    @can_load_page
    def click(self, selector):
        """Click the targeted element.

        :param selector: A CSS3 selector to targeted element.
        """
        if not self.exists(selector):
            raise Exception("Can't find element to click")
        return self.evaluate('GhostUtils.click("%s");' % selector)

    @property
    def content(self):
        """Returns main_frame HTML as a string."""
        return unicode(self.main_frame.toHtml())

    def get_current_frame_content(self):
        """Returns current frame HTML as a string."""
        return unicode(self.currentFrame().toHtml())

    @can_load_page
    def evaluate(self, script):
        """Evaluates script in page frame.

        :param script: The script to evaluate.
        """
        return self.currentFrame().evaluateJavaScript("%s" % script)

    def evaluate_js_file(self, path, encoding='utf-8'):
        """Evaluates javascript file at given path in current frame.
        Raises native IOException in case of invalid file.

        :param path: The path of the file.
        :param encoding: The file's encoding.
        """
        self.evaluate(codecs.open(path, encoding=encoding).read())

    def exists(self, selector):
        """Checks if element exists for given selector.

        :param string: The element selector.
        """
        return not self.currentFrame().findFirstElement(selector).isNull()

    @can_load_page
    def fill(self, selector, values):
        """Fills a form with provided values.

        :param selector: A CSS selector to the target form to fill.
        :param values: A dict containing the values.
        """
        if not self.exists(selector):
            raise Exception("Can't find form")

        for field in values:
            self.set_field_value("%s [name=%s]" % (selector, field),
                                 values[field])
        return True

    @client_utils_required
    @can_load_page
    def fire_on(self, selector, method):
        """Call method on element matching given selector.

        :param selector: A CSS selector to the target element.
        :param method: The name of the method to fire.
        :param expect_loading: Specifies if a page loading is expected.
        """
        return self.evaluate('GhostUtils.fireOn("%s", "%s");' %
                             (selector, method))

    def global_exists(self, global_name):
        """Checks if javascript global exists.

        :param global_name: The name of the global.
        """
        return self.evaluate('!(typeof %s === "undefined");' % global_name)

    def _reset_for_loading(self):
        """Prepare GhostWebPage to load a new url into
        the Main Frame
        """
        self.http_resources = []
        self.http_resource_page = None
        self.loaded = False

    def open(self,
             address,
             method='get',
             headers={},
             auth=None,
             wait_onload_event=True,
             wait_for_loading=True):
        """Opens a web page.

        :param address: The resource URL.
        :param method: The Http method.
        :param headers: An optional dict of extra request hearders.
        :param auth: An optional tupple of HTTP auth (username, password).
        :param wait_onload_event: If it's set to True waits until the OnLoad event from
            the main page is fired. Otherwise wait until the Dom is ready.
        :param wait_for_loading: If True waits until the page is Loaded. Note that wait_onload_event
            isn't valid if wait_for_loading is False.
        :return: Page resource, All loaded resources.
        """
        if not wait_onload_event:
            if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) == 0:
                self.ghostInit.dom_is_ready.connect(self._page_loaded)
            Logger.log("Waiting until OnReady event is fired")
        else:
            if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) > 0:
                self.ghostInit.dom_is_ready.disconnect(self._page_loaded)
            #Logger.log("Waiting until OnLoad event is fired")

        body = QByteArray()

        try:
            method = getattr(QNetworkAccessManager,
                             "%sOperation" % method.capitalize())
        except AttributeError:
            raise Exception("Invalid http method %s" % method)
        request = QNetworkRequest(QUrl(address))
        request.CacheLoadControl(QNetworkRequest.AlwaysNetwork)
        for header in headers:
            request.setRawHeader(header, headers[header])

        if auth is not None:
            self.network_manager.setAuthCredentials(auth[0], auth[1])
        self._reset_for_loading()
        self.main_frame.load(request, method, body)

        if not wait_for_loading:
            return self.get_loaded_page()
        return self.wait_for_page_loaded()

    def download(self, path, address, **kwards):
        page = self.open(address, **kwards)

        with open(path, "wb") as f:
            f.write(page.content)

        return page

    @can_load_page
    @client_utils_required
    def set_field_value(self, selector, value, blur=True):
        """Sets the value of the field matched by given selector.

        :param selector: A CSS selector that target the field.
        :param value: The value to fill in.
        :param blur: An optional boolean that force blur when filled in.
        """
        def _set_text_value(selector, value):
            return self.evaluate('document.querySelector("%s").value=%s;' %
                                 (selector, json.dumps(value)))

        res, resources = None, []

        element = self.main_frame.findFirstElement(selector)
        if element.isNull():
            raise Exception('can\'t find element for %s"' % selector)
        self.fire_on(selector, 'focus')
        if element.tagName() in ["TEXTAREA", "SELECT"]:
            res = _set_text_value(selector, value)
        elif element.tagName() == "INPUT":
            if element.attribute('type') in [
                    "color", "date", "datetime", "datetime-local", "email",
                    "hidden", "month", "number", "password", "range", "search",
                    "tel", "text", "time", "url", "week"
            ]:
                res = _set_text_value(selector, value)
            elif element.attribute('type') == "checkbox":
                res = self.evaluate('GhostUtils.setCheckboxValue("%s", %s);' %
                                    (selector, json.dumps(value)))
            elif element.attribute('type') == "radio":
                res = self.evaluate('GhostUtils.setRadioValue("%s", %s);' %
                                    (selector, json.dumps(value)))
            elif element.attribute('type') == "file":
                self._upload_file = value
                res = self.click(selector)
                self._upload_file = None
        else:
            raise Exception('unsuported field tag')
        if blur:
            self.fire_on(selector, 'blur')

        return res

    def set_viewport_size(self, width, height):
        """Sets the page viewport size.

        :param width: An integer that sets width pixel count.
        :param height: An integer that sets height pixel count.
        """
        self.setViewportSize(QSize(width, height))

    def wait_for(self, condition, timeout_message):
        """Waits until condition is True.

        :param condition: A callable that returns the condition.
        :param timeout_message: The exception message on timeout.
        """
        started_at = time.time()
        while not condition():
            if time.time() > (started_at + self.wait_timeout):
                raise Exception(timeout_message)
            time.sleep(0.01)
            self._app.processEvents()
            if self.wait_callback is not None:
                self.wait_callback()

    def wait_for_alert(self):
        """Waits for main frame alert().
        """
        self.wait_for(lambda: self._alert is not None,
                      'User has not been alerted.')
        msg = self._alert
        self._alert = None
        return msg

    def wait_for_page_loaded(self):
        """Waits until page is loaded, assumed that a page as been requested.
        """
        self.wait_for(
            lambda: self.loaded and len(self._unsupported_files.keys()) == 0,
            'Unable to load requested page')

        return self.get_loaded_page()

    def get_loaded_page(self):
        if self.loaded and len(self._unsupported_files.keys()) == 0:
            return self.http_resource_page

        return None

    def wait_for_selector(self, selector):
        """Waits until selector match an element on the frame.

        :param selector: The selector to wait for.
        """
        self.wait_for(lambda: self.exists(selector),
                      'Can\'t find element matching "%s"' % selector)
        return True

    def wait_for_text(self, text):
        """Waits until given text appear on main frame.

        :param text: The text to wait for.
        """
        self.wait_for(lambda: text in self.currentFrame().toPlainText(),
                      'Can\'t find "%s" in current frame' % text)
        return True

    def _page_load_progress(self, progress):
        pass

    def _page_loaded(self, ok):
        """Called back when page is loaded.
        """
        # FIXME: Check why ok == False when we are trying to load
        # unsupported content
        self.loaded = True

    def _page_load_started(self):
        """Called back when page load started.
        """
        self.loaded = False

    def _release_last_resources(self):
        """Releases last loaded resources.

        :return: The released resources.
        """
        last_resources = self.http_resources
        self.http_resources = []
        return last_resources

    def release_last_resources(self):
        return self._release_last_resources()

    def _insert_dom_ready_code(self):
        self.mainFrame().addToJavaScriptWindowObject("GhostInit",
                                                     self.ghostInit)
        #self.page.mainFrame().addToJavaScriptWindowObject("ghost_frame", self.page.mainFrame());
        self.evaluate_js_file(
            os.path.join(os.path.dirname(__file__), 'domready.js'))

    def _request_ended(self, reply):
        """Adds an HttpResource object to http_resources.

        :param reply: The QNetworkReply object.
        """
        if reply.url() == self.currentFrame().url():
            Logger.log("Injecting DOMReady code")
            self._insert_dom_ready_code()

        content = None
        if unicode(reply.url()) in self._unsupported_files:
            del self._unsupported_files[unicode(reply.url())]
            content = reply.readAll()

        if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute):
            cache = self.network_manager.cache()
            http_resource = HttpResource(reply, cache, content)

            if self.http_resource_page is None:
                self.http_resource_page = http_resource

            if self.max_resource_queued is None or self.max_resource_queued > 0:
                self.http_resources.append(http_resource)


            if self.max_resource_queued is not None and \
                len(self.http_resources) > self.max_resource_queued:
                self.http_resources.pop(0)
                #self._del_resources()

    def _unsupported_content(self, reply):
        """Adds an HttpResource object to http_resources with unsupported
        content.

        :param reply: The QNetworkReply object.
        """
        self._unsupported_files[unicode(reply.url())] = reply

    def switch_to_frame(self, frameName=None):
        """Change the focus to the indicated frame

        :param frameName: The name of the frame
        """
        if frameName is None:
            self.main_frame.setFocus()
            return True

        for frame in self.currentFrame().childFrames():
            if frame.frameName() == frameName:
                frame.setFocus()
                return True
        return False

    def switch_to_frame_nro(self, nro=-1):
        """Change the focus to the indicated frame

        :param nro: Number of the frame
        """
        if nro == -1:
            self.main_frame.setFocus()

        frames = self.currentFrame().childFrames()
        if len(frames) <= (nro + 1):
            frames[nro].setFocus()

        return nro is None or len(frames) < nro

    @property
    def cookies(self):
        """Returns all cookies."""
        return self.network_manager.cookieJar().allCookies()

    def delete_cookies(self):
        """Deletes all cookies."""
        self.network_manager.cookieJar().setAllCookies([])

    def delete_cache(self):
        self.network_manager.cache().clear()

    def load_cookies(self, cookie_storage, keep_old=False):
        """load from cookielib's CookieJar or Set-Cookie3 format text file.

        :param cookie_storage: file location string on disk or CookieJar instance.
        :param keep_old: Don't reset, keep cookies not overriden.
        """
        def toQtCookieJar(pyCookieJar, qtCookieJar):
            all_cookies = qtCookieJar.cookies if keep_old else []
            for pc in pyCookieJar:
                qc = toQtCookie(pc)
                all_cookies.append(qc)
            qtCookieJar.setAllCookies(all_cookies)

        def toQtCookie(pyCookie):
            qc = QNetworkCookie(pyCookie.name, pyCookie.value)
            qc.setSecure(pyCookie.secure)
            if pyCookie.path_specified:
                qc.setPath(pyCookie.path)
            if pyCookie.domain != "":
                qc.setDomain(pyCookie.domain)
            if pyCookie.expires != 0:
                t = QDateTime()
                t.setTime_t(pyCookie.expires)
                qc.setExpirationDate(t)
            # not yet handled(maybe less useful):
            #   py cookie.rest / QNetworkCookie.setHttpOnly()
            return qc

        if cookie_storage.__class__.__name__ == 'str':
            cj = LWPCookieJar(cookie_storage)
            cj.load()
            toQtCookieJar(cj, self.network_manager.cookieJar())
        elif cookie_storage.__class__.__name__.endswith('CookieJar'):
            toQtCookieJar(cookie_storage, self.network_manager.cookieJar())
        else:
            raise ValueError, 'unsupported cookie_storage type.'

    def save_cookies(self, cookie_storage):
        """Save to cookielib's CookieJar or Set-Cookie3 format text file.

        :param cookie_storage: file location string or CookieJar instance.
        """
        def toPyCookieJar(qtCookieJar, pyCookieJar):
            for c in qtCookieJar.allCookies():
                pyCookieJar.set_cookie(toPyCookie(c))

        def toPyCookie(qtCookie):
            port = None
            port_specified = False
            secure = qtCookie.isSecure()
            name = str(qtCookie.name())
            value = str(qtCookie.value())
            v = str(qtCookie.path())
            path_specified = bool(v != "")
            path = v if path_specified else None
            v = str(qtCookie.domain())
            domain_specified = bool(v != "")
            domain = v
            domain_initial_dot = v.startswith(
                '.') if domain_specified else None
            v = long(qtCookie.expirationDate().toTime_t())
            # Long type boundary on 32bit platfroms; avoid ValueError
            expires = 2147483647 if v > 2147483647 else v
            rest = {}
            discard = False
            return Cookie(0, name, value, port, port_specified, domain,
                          domain_specified, domain_initial_dot, path,
                          path_specified, secure, expires, discard, None, None,
                          rest)

        if cookie_storage.__class__.__name__ == 'str':
            cj = LWPCookieJar(cookie_storage)
            toPyCookieJar(self.network_manager.cookieJar(), cj)
            cj.save()
        elif cookie_storage.__class__.__name__.endswith('CookieJar'):
            toPyCookieJar(self.network_manager.cookieJar(), cookie_storage)
        else:
            raise ValueError, 'unsupported cookie_storage type.'
Beispiel #23
0
    def __init__(
            self,
            app,
            network_manager,
            wait_timeout=20,
            wait_callback=None,
            viewport_size=(800, 600),
            user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2',
            log_level=30,
            download_images=True,
            plugins_enabled=False,
            java_enabled=False,
            create_page_callback=None,
            is_popup=False,
            max_resource_queued=None,
            *args,
            **kargs):

        super(GhostWebPage, self).__init__(parent=app)
        self._app = app
        self.pdf_engine = Pdf()
        self.http_resources = []
        self.http_resource_page = None
        self.max_resource_queued = max_resource_queued
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.loaded = True
        self.create_page_callback = create_page_callback
        self.is_popup = is_popup
        # Internal library object
        self.ghostInit = GhostInit()

        self.setForwardUnsupportedContent(True)
        self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages,
                                     download_images)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled,
                                     True)
        self.settings().setAttribute(
            QtWebKit.QWebSettings.JavascriptCanOpenWindows, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled,
                                     plugins_enabled)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled,
                                     java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.loadFinished.connect(self._page_loaded)
        self.loadStarted.connect(self._page_load_started)
        self.loadProgress.connect(self._page_load_progress)
        self.unsupportedContent.connect(self._unsupported_content)
        self.network_manager = network_manager
        self.setNetworkAccessManager(self.network_manager)
        self.network_manager.finished.connect(self._request_ended)
        # User Agent
        self.setUserAgent(user_agent)

        self.main_frame = self.mainFrame()
        self._unsupported_files = {}
        self.windowCloseRequested.connect(self._closeWindow)

        logger.setLevel(log_level)
Beispiel #24
0
def pdf_merge():
    pdf = Pdf(title='PDF 文档合并', mode='merge')
    pdf.construct_gui()
Beispiel #25
0
class GhostWebPage(QWebPage):
    """Overrides QtWebKit.QWebPage in order to intercept some graphical
    behaviours like alert(), confirm().
    Also intercepts client side console.log().
    
    :param app: a QApplication that it's running Ghost.
    :param network_manager: a NetworkManager instance in charge of managing all the network
        requests.
    :param wait_timeout: Maximum step duration in second.
    :param wait_callback: An optional callable that is periodically
        executed until Ghost stops waiting.
    :param viewport_size: A tupple that sets initial viewport size.
    :param user_agent: The default User-Agent header.
    :param log_level: The optional logging level.
    :param download_images: Indicate if the browser download or not the images
    :param plugins_enabled: Enable plugins (like Flash).
    :param java_enabled: Enable Java JRE.
    :param create_page_callback: A method called when a popup it's opened
    :param is_popup: Boolean who indicate if the page it's a popup
    :param max_resource_queued: Indicates witch it's the max number of resources that can be
            saved in memory. If None then no limits are applied. If 0 then no resources are kept/
            If the number it's > 0 then the number of resources won't be more than max_resource_queued
    """
    user_agent = ""
    removeWindowFromList = pyqtSignal(object)
    
    _alert = None
    _confirm_expected = None
    _prompt_expected = None
    _upload_file = None
    _app = None
    
    def __init__(self, app, network_manager, wait_timeout=20, wait_callback=None,
                viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2',
                log_level=30, download_images=True, plugins_enabled=False,
                java_enabled=False, create_page_callback=None,
                is_popup=False, max_resource_queued=None,
                *args, **kargs):
        
        super(GhostWebPage, self).__init__(parent=app)
        self._app = app
        self.pdf_engine = Pdf()
        self.http_resources = []
        self.http_resource_page = None
        self.max_resource_queued = max_resource_queued
        self.wait_timeout = wait_timeout
        self.wait_callback = wait_callback
        self.loaded = True
        self.create_page_callback = create_page_callback
        self.is_popup = is_popup
        # Internal library object
        self.ghostInit =  GhostInit()
        
        self.setForwardUnsupportedContent(True)
        self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptCanOpenWindows, True)
        self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled)
        self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled)

        self.set_viewport_size(*viewport_size)

        # Page signals
        self.loadFinished.connect(self._page_loaded)
        self.loadStarted.connect(self._page_load_started)
        self.loadProgress.connect(self._page_load_progress)
        self.unsupportedContent.connect(self._unsupported_content)
        self.network_manager = network_manager
        self.setNetworkAccessManager(self.network_manager)
        self.network_manager.finished.connect(self._request_ended)
        # User Agent
        self.setUserAgent(user_agent)

        self.main_frame = self.mainFrame()
        self._unsupported_files = {}
        self.windowCloseRequested.connect(self._closeWindow)
        
        logger.setLevel(log_level)
        
    
    class confirm:
        """Statement that tells Ghost how to deal with javascript confirm().

        :param confirm: A bollean that confirm.
        :param callable: A callable that returns a boolean for confirmation.
        """
        def __init__(self, confirm=True, callback=None):
            self.confirm = confirm
            self.callback = callback

        def __enter__(self):
            GhostWebPage._confirm_expected = (self.confirm, self.callback)

        def __exit__(self, type, value, traceback):
            GhostWebPage._confirm_expected = None
    
    
    class prompt:
        """Statement that tells Ghost how to deal with javascript prompt().

        :param value: A string value to fill in prompt.
        :param callback: A callable that returns the value to fill in.
        """
        def __init__(self, value='', callback=None):
            self.value = value
            self.callback = callback

        def __enter__(self):
            GhostWebPage._prompt_expected = (self.value, self.callback)

        def __exit__(self, type, value, traceback):
            GhostWebPage._prompt_expected = None     
            
    def chooseFile(self, frame, suggested_file=None):
        return self._upload_file

    def javaScriptConsoleMessage(self, message, line, source):
        """Prints client console message in current output stream."""
        super(GhostWebPage, self).javaScriptConsoleMessage(message, line,
            source)
        log_type = "error" if "Error" in message else "info"
        Logger.log("%s(%d): %s" % (source or '<unknown>', line, message),
        sender="Frame", level=log_type)

    def javaScriptAlert(self, frame, message):
        """Notifies ghost for alert, then pass."""
        self._alert = message
        Logger.log("alert('%s')" % message, sender="Frame")

    def javaScriptConfirm(self, frame, message):
        """Checks if ghost is waiting for confirm, then returns the right
        value.
        """
        if GhostWebPage._confirm_expected is None:
            raise Exception('You must specified a value to confirm "%s"' %
                message)
        
        confirmation, callback = GhostWebPage._confirm_expected
        GhostWebPage._confirm_expected = None
        Logger.log("confirm('%s')" % message, sender="Frame")
        if callback is not None:
            return callback()
        return confirmation

    def javaScriptPrompt(self, frame, message, defaultValue, result=None):
        """Checks if ghost is waiting for prompt, then enters the right
        value.
        """
        if GhostWebPage._prompt_expected is None:
            raise Exception('You must specified a value for prompt "%s"' %
                message)
        result_value, callback = GhostWebPage._prompt_expected
        Logger.log("prompt('%s')" % message, sender="Frame")
        if callback is not None:
            result_value = callback()
        if result_value == '':
            Logger.log("'%s' prompt filled with empty string" % message,
                level='warning')
        GhostWebPage._prompt_expected = None
        if result is None:
            # PySide
            return True, result_value
        result.append(result_value)
        return True

    def setUserAgent(self, user_agent):
        self.user_agent = user_agent

    def userAgentForUrl(self, url):
        return self.user_agent
    
    def acceptNavigationRequest(self, frame, request, ttype):
        self._lastUrl = request.url()
        return True
    
    def createWindow(self, ttype):
        page = None
        if self.create_page_callback is not None:
            page, name = self.create_page_callback(is_popup=True)
            page.open(self._lastUrl)
            
        return page
    
    def _closeWindow(self):
        #if self._main_window is not None:
        self.removeWindowFromList.emit(self)
        
    def switch_to_sub_window(self, index):
        """Change the focus to the sub window (popup)
        :param index: The index of the window, in the order that the
            window was opened
        """
        if len(self._windows) > index:
            self._windows[index].mainFrame().setFocus()
            return self._windows[index]                    
        return None
    
    def capture(self, region=None, selector=None,
            format=QImage.Format_ARGB32_Premultiplied):
        """Returns snapshot as QImage.

        :param region: An optional tupple containing region as pixel
            coodinates.
        :param selector: A selector targeted the element to crop on.
        :param format: The output image format.
        """
        if region is None and selector is not None:
            region = self.region_for_selector(selector)
        if region:
            x1, y1, x2, y2 = region
            w, h = (x2 - x1), (y2 - y1)
            image = QImage(QSize(x2, y2), format)
            painter = QPainter(image)
            self.currentFrame().render(painter)
            painter.end()
            image = image.copy(x1, y1, w, h)
        else:
            self.currentFrame().setScrollBarPolicy(QtCore.Qt.Vertical,
                                QtCore.Qt.ScrollBarAlwaysOff)
            self.currentFrame().setScrollBarPolicy(QtCore.Qt.Horizontal,
                                QtCore.Qt.ScrollBarAlwaysOff)
            self.setViewportSize(self.currentFrame().contentsSize())
            image = QImage(self.viewportSize(), format)
            painter = QPainter(image)
            self.currentFrame().render(painter)
            painter.end()
        return image   
    
    
    def capture_to(self, path, region=None, selector=None,
        format=QImage.Format_ARGB32_Premultiplied):
        """Saves snapshot as image.

        :param path: The destination path.
        :param region: An optional tupple containing region as pixel
            coodinates.
        :param selector: A selector targeted the element to crop on.
        :param format: The output image format.
            The available formats can be found here http://qt-project.org/doc/qt-4.8/qimage.html#Format-enum
            There is also a "pdf" format that will render the page into a pdf file
        """
        if str(format).startswith("pdf"):
            return self.pdf_engine.render_pdf(self, path)
        else:
            self.capture(region=region, format=format,
                selector=selector).save(path)
            
    
    @client_utils_required
    def region_for_selector(self, selector):
        """Returns frame region for given selector as tupple.

        :param selector: The targeted element.
        """
        geo = self.currentFrame().findFirstElement(selector).geometry()
        try:
            region = (geo.left(), geo.top(), geo.right(), geo.bottom())
        except:
            raise Exception("can't get region for selector '%s'" % selector)
        return region
    
    
    @client_utils_required
    @can_load_page
    def click(self, selector):
        """Click the targeted element.

        :param selector: A CSS3 selector to targeted element.
        """
        if not self.exists(selector):
            raise Exception("Can't find element to click")
        return self.evaluate('GhostUtils.click("%s");' % selector)
    
    
    @property
    def content(self):
        """Returns main_frame HTML as a string."""
        return unicode(self.main_frame.toHtml())

    
    def get_current_frame_content(self):
        """Returns current frame HTML as a string."""
        return unicode(self.currentFrame().toHtml())
    
    @can_load_page
    def evaluate(self, script):
        """Evaluates script in page frame.

        :param script: The script to evaluate.
        """
        return self.currentFrame().evaluateJavaScript("%s" % script)

    def evaluate_js_file(self, path, encoding='utf-8'):
        """Evaluates javascript file at given path in current frame.
        Raises native IOException in case of invalid file.

        :param path: The path of the file.
        :param encoding: The file's encoding.
        """
        self.evaluate(codecs.open(path, encoding=encoding).read())

    def exists(self, selector):
        """Checks if element exists for given selector.

        :param string: The element selector.
        """
        return not self.currentFrame().findFirstElement(selector).isNull()
    
    
    @can_load_page
    def fill(self, selector, values):
        """Fills a form with provided values.

        :param selector: A CSS selector to the target form to fill.
        :param values: A dict containing the values.
        """
        if not self.exists(selector):
            raise Exception("Can't find form")
        
        for field in values:
            self.set_field_value("%s [name=%s]" % (selector, field),
                values[field])
        return True

    @client_utils_required
    @can_load_page
    def fire_on(self, selector, method):
        """Call method on element matching given selector.

        :param selector: A CSS selector to the target element.
        :param method: The name of the method to fire.
        :param expect_loading: Specifies if a page loading is expected.
        """
        return self.evaluate('GhostUtils.fireOn("%s", "%s");' % (
            selector, method))

    def global_exists(self, global_name):
        """Checks if javascript global exists.

        :param global_name: The name of the global.
        """
        return self.evaluate('!(typeof %s === "undefined");' %
            global_name)
    
    def _reset_for_loading(self):
        """Prepare GhostWebPage to load a new url into
        the Main Frame
        """
        self.http_resources = []
        self.http_resource_page = None
        self.loaded = False
        
    def open(self, address, method='get', headers={}, auth=None,
            wait_onload_event=True, wait_for_loading=True):
        """Opens a web page.

        :param address: The resource URL.
        :param method: The Http method.
        :param headers: An optional dict of extra request hearders.
        :param auth: An optional tupple of HTTP auth (username, password).
        :param wait_onload_event: If it's set to True waits until the OnLoad event from
            the main page is fired. Otherwise wait until the Dom is ready.
        :param wait_for_loading: If True waits until the page is Loaded. Note that wait_onload_event
            isn't valid if wait_for_loading is False.
        :return: Page resource, All loaded resources.
        """
        if not wait_onload_event:
            if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) == 0:
                self.ghostInit.dom_is_ready.connect(self._page_loaded)
            Logger.log("Waiting until OnReady event is fired")
        else:
            if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) > 0:
                self.ghostInit.dom_is_ready.disconnect(self._page_loaded)
            #Logger.log("Waiting until OnLoad event is fired")
        
        body = QByteArray()
        
        try:
            method = getattr(QNetworkAccessManager,
                "%sOperation" % method.capitalize())
        except AttributeError:
            raise Exception("Invalid http method %s" % method)
        request = QNetworkRequest(QUrl(address))
        request.CacheLoadControl(QNetworkRequest.AlwaysNetwork)
        for header in headers:
            request.setRawHeader(header, headers[header])
        
        if auth is not None:
            self.network_manager.setAuthCredentials(auth[0], auth[1])
        self._reset_for_loading()
        self.main_frame.load(request, method, body)
        
        if not wait_for_loading:
            return self.get_loaded_page()
        return self.wait_for_page_loaded()
    
    
    def download(self, path, address, **kwards):
        page = self.open(address, **kwards)
        
        with open(path, "wb") as f:
            f.write(page.content)
        
        return page
    
    
    @can_load_page
    @client_utils_required
    def set_field_value(self, selector, value, blur=True):
        """Sets the value of the field matched by given selector.

        :param selector: A CSS selector that target the field.
        :param value: The value to fill in.
        :param blur: An optional boolean that force blur when filled in.
        """
        def _set_text_value(selector, value):
            return self.evaluate(
                'document.querySelector("%s").value=%s;' %
                    (selector, json.dumps(value)))

        res, resources = None, []

        element = self.main_frame.findFirstElement(selector)
        if element.isNull():
            raise Exception('can\'t find element for %s"' % selector)
        self.fire_on(selector, 'focus')
        if element.tagName() in ["TEXTAREA", "SELECT"]:
            res = _set_text_value(selector, value)
        elif element.tagName() == "INPUT":
            if element.attribute('type') in ["color", "date", "datetime",
                "datetime-local", "email", "hidden", "month", "number",
                "password", "range", "search", "tel", "text", "time",
                "url", "week"]:
                res = _set_text_value(selector, value)
            elif element.attribute('type') == "checkbox":
                res = self.evaluate(
                    'GhostUtils.setCheckboxValue("%s", %s);' %
                        (selector, json.dumps(value)))
            elif element.attribute('type') == "radio":
                res = self.evaluate(
                    'GhostUtils.setRadioValue("%s", %s);' %
                        (selector, json.dumps(value)))
            elif element.attribute('type') == "file":
                self._upload_file = value
                res = self.click(selector)
                self._upload_file = None
        else:
            raise Exception('unsuported field tag')
        if blur:
            self.fire_on(selector, 'blur')
            
        return res

    def set_viewport_size(self, width, height):
        """Sets the page viewport size.

        :param width: An integer that sets width pixel count.
        :param height: An integer that sets height pixel count.
        """
        self.setViewportSize(QSize(width, height))
    
    def wait_for(self, condition, timeout_message):
        """Waits until condition is True.

        :param condition: A callable that returns the condition.
        :param timeout_message: The exception message on timeout.
        """
        started_at = time.time()
        while not condition():
            if time.time() > (started_at + self.wait_timeout):
                raise Exception(timeout_message)
            time.sleep(0.01)
            self._app.processEvents()
            if self.wait_callback is not None:
                self.wait_callback()

    def wait_for_alert(self):
        """Waits for main frame alert().
        """
        self.wait_for(lambda: self._alert is not None,
            'User has not been alerted.')
        msg = self._alert
        self._alert = None
        return msg

    def wait_for_page_loaded(self):
        """Waits until page is loaded, assumed that a page as been requested.
        """
        self.wait_for(lambda: self.loaded and len(self._unsupported_files.keys()) == 0,
            'Unable to load requested page')
        
        return self.get_loaded_page()
    
    def get_loaded_page(self):
        if self.loaded and len(self._unsupported_files.keys()) == 0:
            return self.http_resource_page            
            
        return None
    
    def wait_for_selector(self, selector):
        """Waits until selector match an element on the frame.

        :param selector: The selector to wait for.
        """
        self.wait_for(lambda: self.exists(selector),
            'Can\'t find element matching "%s"' % selector)
        return True

    def wait_for_text(self, text):
        """Waits until given text appear on main frame.

        :param text: The text to wait for.
        """
        self.wait_for(lambda: text in self.currentFrame().toPlainText(),
            'Can\'t find "%s" in current frame' % text)
        return True
    
    def _page_load_progress(self, progress):
        pass
        
    def _page_loaded(self, ok):
        """Called back when page is loaded.
        """
        # FIXME: Check why ok == False when we are trying to load
        # unsupported content
        self.loaded = True

    def _page_load_started(self):
        """Called back when page load started.
        """
        self.loaded = False

    def _release_last_resources(self):
        """Releases last loaded resources.

        :return: The released resources.
        """
        last_resources = self.http_resources
        self.http_resources = []
        return last_resources
    
    def release_last_resources(self):
        return self._release_last_resources()
    
    def _insert_dom_ready_code(self):
        self.mainFrame().addToJavaScriptWindowObject("GhostInit", self.ghostInit);
        #self.page.mainFrame().addToJavaScriptWindowObject("ghost_frame", self.page.mainFrame());
        self.evaluate_js_file(os.path.join(os.path.dirname(__file__), 'domready.js'))
    
    def _request_ended(self, reply):
        """Adds an HttpResource object to http_resources.

        :param reply: The QNetworkReply object.
        """
        if reply.url() == self.currentFrame().url():
            Logger.log("Injecting DOMReady code")
            self._insert_dom_ready_code()
        
        content = None
        if unicode(reply.url()) in self._unsupported_files:
            del self._unsupported_files[unicode(reply.url())]
            content = reply.readAll()
        
        if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute):
            cache = self.network_manager.cache()
            http_resource = HttpResource(reply, cache, content)
            
            if self.http_resource_page is None:
                self.http_resource_page = http_resource
            
            if self.max_resource_queued is None or self.max_resource_queued > 0:
                self.http_resources.append(http_resource)
            
            
            if self.max_resource_queued is not None and \
                len(self.http_resources) > self.max_resource_queued:
                self.http_resources.pop(0)
                #self._del_resources()
            
            
    def _unsupported_content(self, reply):
        """Adds an HttpResource object to http_resources with unsupported
        content.

        :param reply: The QNetworkReply object.
        """
        self._unsupported_files[unicode(reply.url())] = reply
    
    
    def switch_to_frame(self, frameName=None):
        """Change the focus to the indicated frame

        :param frameName: The name of the frame
        """
        if frameName is None:
            self.main_frame.setFocus()
            return True
        
        for frame in self.currentFrame().childFrames():
            if frame.frameName() == frameName:
                frame.setFocus()
                return True
        return False
    
    def switch_to_frame_nro(self, nro=-1):
        """Change the focus to the indicated frame

        :param nro: Number of the frame
        """
        if nro == -1:
            self.main_frame.setFocus()
        
        frames = self.currentFrame().childFrames()
        if len(frames) <= (nro + 1):
            frames[nro].setFocus()
        
        return nro is None or len(frames) < nro
    
    @property
    def cookies(self):
        """Returns all cookies."""
        return self.network_manager.cookieJar().allCookies()

    def delete_cookies(self):
        """Deletes all cookies."""
        self.network_manager.cookieJar().setAllCookies([])
    
    def delete_cache(self):
        self.network_manager.cache().clear()
        
    def load_cookies( self, cookie_storage, keep_old=False ):
        """load from cookielib's CookieJar or Set-Cookie3 format text file.

        :param cookie_storage: file location string on disk or CookieJar instance.
        :param keep_old: Don't reset, keep cookies not overriden.
        """
        def toQtCookieJar(pyCookieJar, qtCookieJar):
            all_cookies = qtCookieJar.cookies if keep_old else []
            for pc in pyCookieJar:
                qc = toQtCookie(pc)
                all_cookies.append(qc)
            qtCookieJar.setAllCookies(all_cookies)

        def toQtCookie(pyCookie):
            qc = QNetworkCookie(pyCookie.name, pyCookie.value)
            qc.setSecure(pyCookie.secure)
            if pyCookie.path_specified:
                qc.setPath(pyCookie.path)
            if pyCookie.domain != "" :
                qc.setDomain(pyCookie.domain)
            if pyCookie.expires != 0:
                t = QDateTime()
                t.setTime_t(pyCookie.expires)
                qc.setExpirationDate(t)
            # not yet handled(maybe less useful):
            #   py cookie.rest / QNetworkCookie.setHttpOnly()
            return qc

        if cookie_storage.__class__.__name__ == 'str':
            cj = LWPCookieJar(cookie_storage)
            cj.load()
            toQtCookieJar(cj, self.network_manager.cookieJar())
        elif cookie_storage.__class__.__name__.endswith('CookieJar') :
            toQtCookieJar(cookie_storage, self.network_manager.cookieJar())
        else:
            raise ValueError, 'unsupported cookie_storage type.'
        
    
    def save_cookies(self, cookie_storage):
        """Save to cookielib's CookieJar or Set-Cookie3 format text file.

        :param cookie_storage: file location string or CookieJar instance.
        """
        def toPyCookieJar(qtCookieJar, pyCookieJar):
            for c in qtCookieJar.allCookies():
                pyCookieJar.set_cookie(toPyCookie(c))

        def toPyCookie(qtCookie):
            port = None
            port_specified = False
            secure = qtCookie.isSecure()
            name = str(qtCookie.name())
            value = str(qtCookie.value())
            v = str(qtCookie.path())
            path_specified = bool( v != "" )
            path = v if path_specified else None
            v = str(qtCookie.domain())
            domain_specified = bool( v != "" )
            domain = v
            domain_initial_dot = v.startswith('.') if domain_specified else None
            v = long(qtCookie.expirationDate().toTime_t())
            # Long type boundary on 32bit platfroms; avoid ValueError
            expires = 2147483647 if v > 2147483647 else v
            rest = {}
            discard = False
            return Cookie(0, name, value, port, port_specified, domain,
                domain_specified, domain_initial_dot, path, path_specified,
                secure, expires, discard, None, None, rest)

        if cookie_storage.__class__.__name__ == 'str':
            cj = LWPCookieJar(cookie_storage)
            toPyCookieJar(self.network_manager.cookieJar(), cj)
            cj.save()
        elif cookie_storage.__class__.__name__.endswith('CookieJar') :
            toPyCookieJar(self.network_manager.cookieJar(), cookie_storage)
        else:
            raise ValueError, 'unsupported cookie_storage type.'
Beispiel #26
0
def main():
    arg_parser = argparse.ArgumentParser(
        description=
        'Create a pdf document from a exported csv of Pivotal Tracker',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    arg_parser.add_argument('csv', help='the file path to the csv file')
    arg_parser.add_argument('-m',
                            '--margin',
                            type=int,
                            default=5,
                            help='margin of the page in mm')
    arg_parser.add_argument('-o',
                            '--output',
                            help='file path to the generated pdf')
    arg_parser.add_argument('-n',
                            '--show-number',
                            action='store_true',
                            help='shows the story number on the bottom left')
    arg_parser.add_argument('-t',
                            '--show-tasks',
                            action='store_true',
                            help='shows the tasks for each story')
    arg_parser.add_argument(
        '-c',
        '--collate',
        action='store_true',
        help='collate stories for easier sorting after cutting all pages')
    arg_parser.add_argument(
        '-s',
        '--strict',
        action='store_true',
        help='fails if the csv file does not contain all required columns')

    args = arg_parser.parse_args()

    output_file = args.output if args.output is not None \
        else os.path.splitext(args.csv)[0] + '.pdf'
    page_margin = args.margin
    story_width = (297 - (page_margin * 2)) / 2
    story_height = (210 - (page_margin * 2)) / 2
    stories = []

    with open(args.csv, 'r') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        data = list(reader)
        if args.strict:
            validate_columns(data[0])
        stories = map(partial(make_pivotal_story, data[0]),
                      enumerate(data[1:], 1))

    pdf = Pdf()
    pdf.set_auto_page_break(False)

    positions = [(page_margin, page_margin),
                 (page_margin + story_width, page_margin),
                 (page_margin, page_margin + story_height),
                 (page_margin + story_width, page_margin + story_height)]

    stories = list(iterstories(stories, include_tasks=args.show_tasks))
    chunk_function = stacked_chunks if args.collate else chunks
    for story_chunk in chunk_function(stories, 4):
        pdf.add_page('Landscape')
        for story, position in zip(story_chunk, positions):
            story.draw(
                pdf,
                position[0],
                position[1],
                story_width,
                story_height,
                args.show_number,
            )

    pdf.output(output_file)

    open_file(output_file)

    return output_file