예제 #1
0
def create_request_interceptor(event):
    """
    Intercept "op" (opaque parameters) request parameter,
    decode it and serve as ``request.opaque`` dictionary.
    """
    request = event.request

    request.opaque = {}
    request.opaque_meta = {}

    # extract opaque parameters token from request
    op_token = request.params.get('op')

    # do nothing if no token given
    if not op_token:
        return

    registry = event.request.registry
    signer = registry.getUtility(ISigner)

    try:
        data, meta = signer.unsign(op_token)
        if data:
            request.opaque.update(data)
        else:
            log.error('opaque parameter token is empty. data=%s, token=%s',
                      data, op_token)
        if meta:
            request.opaque_meta.update(meta)
            request.opaque_meta.update({'status': 'ok'})
        else:
            log.error(
                'metadata of opaque parameter token is empty. meta=%s, token=%s',
                meta, op_token)

    except JwtExpiryError as ex:
        expiry_unixtime = ex.message['jwt_expiry']
        expiry_iso = datetime_iso(unixtime_to_datetime(expiry_unixtime))
        ex.message['jwt_expiry_iso'] = expiry_iso
        log.error('Opaque parameter token expired: expiry=%s, message=%s',
                  expiry_iso, ex.message)
        request.opaque_meta.update({'status': 'error', 'errors': [ex.message]})
        # TODO: log/send full stacktrace
        log.error(exception_traceback())

    except JwtVerifyError as ex:
        log.error('Error while decoding opaque parameter token: %s',
                  ex.message)
        request.opaque_meta.update({'status': 'error', 'errors': [ex.message]})
        # TODO: log/send full stacktrace
        log.error(exception_traceback())
예제 #2
0
def pdf_universal(patent):

    pdf = None
    datasource = None
    meta = {}

    document = decode_patent_number(patent)
    number_normalized = normalize_patent(patent)

    # first, try archive
    try:
        # Skip requests for documents w/o kindcode
        if not document.kind:
            raise ValueError(u'No kindcode for patent: {}'.format(patent))

        pdf = archive_fetch_pdf(number_normalized)
        datasource = 'archive'

    except Exception as ex:

        if not isinstance(ex, HTTPNotFound):
            log.error(exception_traceback())
        """
        # second, try archive again after running acquisition
        try:

            # Skip requests for documents w/o kindcode
            if not document.kind: raise ValueError(u'No kindcode')

            run_acquisition(number_normalized, 'pdf')
            pdf = archive_fetch_pdf(number_normalized, 2)
            datasource = 'archive'

        except Exception as ex:
        """

        if True:

            if not isinstance(ex, HTTPNotFound):
                log.error(exception_traceback())

            if document:

                pdf = pdf_from_ops(patent, document, meta)
                datasource = 'ops'

            else:
                log.error('Locating a document at the domestic office requires ' \
                          'a decoded document number for "{}"'.format(patent))

    return {'pdf': pdf, 'datasource': datasource, 'meta': meta}
예제 #3
0
def pdf_from_ops(patent, document, meta):
    # third, try building from OPS single images
    try:

        # 2016-04-21: Amend document number for CA documents, e.g. CA2702893C -> CA2702893A1
        # TOOD: Reenable feature, but only when prefixing document with a custom page
        #       informing the user about recent changes not yet arrived at EPO.
        #if document.country == 'CA':
        #    patent = document.country + document.number

        log.info('PDF OPS attempt for {0}'.format(patent))

        return ops_build_pdf(patent)

    except Exception as ex:

        if not isinstance(ex, HTTPNotFound):
            log.error(exception_traceback())

        if document.country == 'US':

            log.info('PDF USPTO attempt for {0}'.format(patent))
            images_location = get_images_view_url(document)
            if images_location:
                meta.update(images_location)
            else:
                log.warning('PDF USPTO not available for {}'.format(patent))
예제 #4
0
def depatisconnect_claims_handler_real(patent):
    try:
        claims = depatisconnect_claims(patent)

    except KeyError as ex:
        log.error('No details at DEPATISconnect: %s %s', type(ex), ex)
        raise HTTPNotFound(ex)

    except ValueError as ex:
        log.error('Fetching details from DEPATISconnect failed: %s %s', type(ex), ex)
        raise HTTPBadRequest(ex)

    except Exception as ex:
        log.error('Unknown error from DEPATISconnect: %s %s.', type(ex), ex)
        log.error(exception_traceback())
        raise HTTPBadRequest(ex)

    return claims
예제 #5
0
def depatisconnect_description_handler_real(patent):
    try:
        description = depatisconnect_description(patent)
        if not description['xml']:
            raise KeyError('Description is empty')

    except KeyError as ex:
        log.error('No details at DEPATISconnect: %s %s', type(ex), ex)
        raise HTTPNotFound(ex)

    except ValueError as ex:
        log.error('Fetching details from DEPATISconnect failed: %s %s', type(ex), ex)
        raise HTTPBadRequest(ex)

    except Exception as ex:
        log.error('Unknown error from DEPATISconnect: %s %s.', type(ex), ex)
        log.error(exception_traceback())
        raise HTTPBadRequest(ex)

    return description
예제 #6
0
def pdf_universal_real(patent, response):

    document = decode_patent_number(patent)
    number_normalized = normalize_patent(patent)

    # Sanity checks.
    if document is None:
        log.error('Locating a document at the domestic office requires ' \
                  'a decoded document number for "{}"'.format(patent))
        raise ValueError('Unable to decode document number {}'.format(patent))

    # 1. If it's an EP document, try European publication server first.
    if response.pdf is None and document.country == 'EP':

        try:
            response.pdf = publicationserver_fetch_pdf(patent)
            response.datasource = 'epo-publication-server'

        except Exception as ex:
            log.warning('PDF {}: Not available from EPO. {}'.format(
                patent, ex))
            if not isinstance(ex, HTTPError):
                log.error(exception_traceback())

    # 2. Next, try USPTO servers if it's an US document.
    if response.pdf is None and document.country == 'US':

        try:
            response.pdf = uspto_fetch_pdf(patent)
            response.datasource = 'uspto'

        except Exception as ex:
            log.warning('PDF {}: Not available from USPTO. {}'.format(
                patent, ex))
            if not isinstance(ex, HTTPError):
                log.error(exception_traceback())

    # 3. Next, try DPMA servers.
    if response.pdf is None:
        try:
            # Skip requests for documents w/o kindcode
            if not document.kind:
                raise ValueError('No kindcode for patent: {}'.format(patent))

            response.pdf = depatisconnect_fetch_pdf(number_normalized)
            response.datasource = 'dpma'

        except Exception as ex:
            log.warning('PDF {}: Not available from DPMA. {}'.format(
                patent, ex))

            # Evaluate exception.
            if isinstance(ex, NotConfiguredError):
                log.warning(ex)

            elif not isinstance(ex, HTTPNotFound):
                log.error(exception_traceback())

    # 4. Next, try EPO OPS service.
    # Note this will assemble PDF out of single pages requested
    # from EPO OPS, which is a rather expensive operation.
    if response.pdf is None:

        # 2016-04-21: Amend document number for CA documents, e.g. CA2702893C -> CA2702893A1
        # TODO: Reenable feature, but only when prefixing document with a custom page
        #       informing the user about recent changes not yet arrived at EPO.
        # if document.country == 'CA':
        #    patent = document.country + document.number

        try:
            response.pdf = ops_build_pdf(patent)
            response.datasource = 'epo-ops'

        except Exception as ex:
            log.warning('PDF {}: Not available from OPS. {}'.format(
                patent, ex))
            if not isinstance(ex, HTTPError):
                log.error(exception_traceback())

    # 5. Last but not least, try to redirect to USPTO server.
    # TODO: Move elsewhere as deactivated on 2019-02-19.
    if False and response.pdf is None and document.country == 'US':

        log.info('PDF {}: USPTO attempt'.format(patent))
        uspto_found = False
        reason = None
        try:
            images_location = uspto_pdfview_url(document)
            if images_location:
                response.meta.update(images_location)
                response.datasource = 'uspto'
                uspto_found = True

        except Exception as ex:
            reason = ex
            if not isinstance(ex, HTTPError):
                log.error(exception_traceback())

        if not uspto_found:
            log.warning('PDF {}: Not available on USPTO. {}'.format(
                patent, reason))

    return True
예제 #7
0
def export_util_handler(request):

    #print 'request.matchdict:', request.matchdict

    output_kind   = request.matchdict['kind']
    output_format = request.matchdict['format']

    # Convert numberlists to Excel
    if output_kind == 'numberlist':
        numberlist = parse_numberlist(request.params.get('numberlist'))

        payload = create_xlsx({'numberlist': numberlist})

        # Export buffer to HTTP response
        filename = '{0}.xlsx'.format('numberlist')
        mimetype, encoding = mimetypes.guess_type(filename, strict=False)

        request.response.content_type = mimetype
        request.response.charset = None
        request.response.headers['Content-Disposition'] = 'attachment; filename={filename}'.format(filename=filename)

        #response['numberlist'] = numberlist

        # Send as response
        return payload

    elif output_kind == 'dossier':

        log.info('Starting dossier export to format "{format}"'.format(format=output_format))
        data = bunchify(json.loads(request.params.get('json')))

        # Debugging
        #print 'dossier-data:'; pprint(data.toDict())

        payload = None
        try:
            if output_format == 'xlsx':
                # Generate Office Open XML Workbook
                payload = DossierXlsx(data).create()

            elif output_format == 'pdf':
                # Generate Office Open XML Workbook and convert to PDF
                dossier = DossierXlsx(data)
                payload = dossier.to_pdf()

            elif output_format == 'csv':
                # TODO: Add comments inline into numberlist
                dossier = Dossier(data)
                payload = dossier.to_csv(dossier.df_documents)

            elif output_format == 'zip':
                dossier = Dossier(data)
                payload = dossier.to_zip(request=request, options=data.get('options'))

            else:
                return HTTPBadRequest('Export format "{format}" is unknown.'.format(format=output_format))

        except Exception as ex:
            message = 'Exporting format "{format}" failed.'.format(format=output_format)
            log.error('{message}. Exception:\n{trace}'.format(message=message, trace=exception_traceback()))
            return HTTPServerError(message)

        # Send HTTP response
        filename = 'dossier_{name}_{timestamp}.{format}'.format(
            name=data.get('name', 'default'),
            timestamp=data.get('project', {}).get('modified'),
            format=output_format)
        mimetype, encoding = mimetypes.guess_type(filename, strict=False)

        request.response.content_type = mimetype
        request.response.charset = None
        request.response.headers['Content-Disposition'] = 'attachment; filename={filename}'.format(filename=filename)

        return payload

    # TODO: Log request
    log.error('Data export error')

    # TODO: Proper error page for user to report this problem.
    raise HTTPServerError('Data export error. Please contact support.')
예제 #8
0
    def handle_exception(self, ex, service_name, document):
        if isinstance(ex, (_JSONError, HTTPError)) and hasattr(
                ex, 'status_int') and ex.status_int == 404:
            log.warning(u'XML({service_name}, {document}) not found'.format(
                service_name=service_name, document=document))

            # Signal exception has been handled (ignored)
            return True
        else:
            log.warning(u'XML({service_name}, {document}) failed. ' \
                        u'Exception:\n{trace}'.format(service_name=service_name, document=document, trace=exception_traceback()))

        # Signal exception should be re-raised, maybe
        return False
예제 #9
0
    def to_zip(self, request=None, options=None):
        """
         u'options': {u'media': {u'biblio': False,
                                 u'claims': False,
                                 u'description': False,
                                 u'pdf': True,
                                 u'register': False},
                      u'report': {u'csv': True,
                                  u'json': True,
                                  u'pdf': False,
                                  u'xlsx': False}},
        """

        # TODO: Text representations for biblio, register, family
        # TODO: PDF Extracts

        options = options or bunchify({'report': {}, 'media': {}})

        # Remove entries with empty/undefined document numbers
        self.df_documents.dropna(subset=['document'], inplace=True)

        # Reject entries with seen == True
        filtered = self.df_documents[(self.df_documents.seen == False)]
        documents = list(filtered.document)

        buffer = BytesIO()
        with ZipFile(buffer, 'w', ZIP_DEFLATED) as zipfile:

            # FIXME: Add TERMS (liability waiver) and more...
            zipfile.writestr('@readme.txt',
                             u'Zip archive created by IP Navigator.')

            # Add text summary
            zipfile.writestr('@metadata.txt',
                             self.get_metadata().encode('utf-8'))
            zipfile.writestr('@summary.txt',
                             self.get_summary().encode('utf-8'))

            # Report files
            # ------------

            # Add Workbook
            workbook_payload = None
            if options.report.xlsx:
                workbook_payload = DossierXlsx(self.data).create()
                zipfile.writestr('report/@dossier.xlsx', workbook_payload)

            # Add Workbook in PDF format
            if options.report.pdf:
                try:
                    zipfile.writestr(
                        'report/@dossier.pdf',
                        DossierXlsx(
                            self.data).to_pdf(payload=workbook_payload))
                except Exception as ex:
                    log.error(u'Rendering dossier to PDF failed. ' \
                              u'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback()))

            # Add CSV
            if options.report.csv:
                zipfile.writestr('report/csv/01-queries.csv',
                                 self.to_csv(self.df_queries))
                zipfile.writestr('report/csv/02-documents.csv',
                                 self.to_csv(self.df_documents))
                zipfile.writestr('report/csv/03-comments.csv',
                                 self.to_csv(self.df_comments))

            # Add JSON
            if options.report.json:
                zipfile.writestr('report/json/01-queries.json',
                                 self.to_json(self.df_queries))
                zipfile.writestr('report/json/02-documents.json',
                                 self.to_json(self.df_documents))
                zipfile.writestr('report/json/03-comments.json',
                                 self.to_json(self.df_comments))

            # Media files
            # -----------

            # FIXME: This should go to some configuration setting.
            fulltext_countries_excluded_ops = [
                'BE', 'CN', 'DD', 'DE', 'DK', 'FR', 'GR', 'HU', 'JP', 'LU',
                'KR', 'RU', 'PT', 'SE', 'TR', 'SK', 'US'
            ]

            # Add full PDF documents
            if options.media.pdf:
                pdf_ziparchive_add(zipfile, documents, path='media/pdf')

            # Add XML data
            # TODO: Add @report.txt for reflecting missing documents, differentiate between different XML kinds.
            # TODO: Add more TEXT formats (.abstract.txt, .biblio.txt, .register.txt)
            # TODO: Add ST.36 XML; e.g. from https://register.epo.org/download?number=EP08835045&tab=main&xml=st36
            # via https://register.epo.org/application?number=EP08835045
            # TODO: Add equivalents, e.g. http://ops.epo.org/3.1/rest-services/published-data/publication/epodoc/EP1000000/equivalents/biblio
            status = OrderedDict()
            for document in documents:

                if not document or not document.strip():
                    continue

                log.info('Data acquisition for document {document}'.format(
                    document=document))

                status.setdefault(document, OrderedDict())
                patent = decode_patent_number(document)

                # Add XML "bibliographic" data (full-cycle)
                if options.media.biblio:
                    try:
                        biblio_payload = get_ops_biblio_data('publication',
                                                             document,
                                                             xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.biblio.xml'.format(
                                document=document), biblio_payload)
                        status[document]['biblio'] = True

                    except Exception as ex:
                        status[document]['biblio'] = False
                        self.handle_exception(ex, 'biblio', document)

                    self.clear_request_errors(request)

                # Add XML "description" full text data
                # OPS does not have full texts for DE, US, ...
                if options.media.description:
                    status[document]['description'] = False
                    if patent.country not in fulltext_countries_excluded_ops:
                        try:
                            # Write XML
                            document_number = encode_epodoc_number(patent)
                            description_payload = ops_description(
                                document_number, xml=True)
                            zipfile.writestr(
                                'media/xml/{document}.description.xml'.format(
                                    document=document), description_payload)
                            status[document]['description'] = True

                            # Write TEXT
                            with ignored():
                                text_payload = self.get_fulltext(
                                    description_payload, 'description')
                                if text_payload:
                                    zipfile.writestr(
                                        'media/txt/{document}.description.txt'.
                                        format(document=document),
                                        text_payload.encode('utf-8'))

                        except Exception as ex:
                            self.handle_exception(ex, 'description', document)

                    self.clear_request_errors(request)

                # Add XML "claims" full text data
                # OPS does not have full texts for DE, US, ...
                if options.media.claims:
                    status[document]['claims'] = False
                    if patent.country not in fulltext_countries_excluded_ops:
                        try:
                            # Write XML
                            document_number = encode_epodoc_number(patent)
                            claims_payload = ops_claims(document_number,
                                                        xml=True)
                            zipfile.writestr(
                                'media/xml/{document}.claims.xml'.format(
                                    document=document), claims_payload)
                            status[document]['claims'] = True

                            # Write TEXT
                            with ignored():
                                text_payload = self.get_fulltext(
                                    claims_payload.replace(
                                        '<claim-text>',
                                        '<p>').replace('</claim-text>',
                                                       '</p>'), 'claims')
                                if text_payload:
                                    zipfile.writestr(
                                        'media/txt/{document}.claims.txt'.
                                        format(document=document),
                                        text_payload.encode('utf-8'))

                        except Exception as ex:
                            self.handle_exception(ex, 'claims', document)

                    self.clear_request_errors(request)

                # Add XML register data
                if options.media.register:

                    try:
                        register_payload = ops_register('publication',
                                                        document,
                                                        xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.register.xml'.format(
                                document=document), register_payload)
                        status[document]['register'] = True

                    except Exception as ex:
                        status[document]['register'] = False
                        self.handle_exception(ex, 'register', document)

                    self.clear_request_errors(request)

                # Add XML family data
                if options.media.family:
                    try:
                        document_number = encode_epodoc_number(
                            patent, {'nokind': True})
                        family_payload = ops_family_inpadoc('publication',
                                                            document_number,
                                                            'biblio',
                                                            xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.family.xml'.format(
                                document=document), family_payload)
                        status[document]['family'] = True

                    except Exception as ex:
                        status[document]['family'] = False
                        self.handle_exception(ex, 'family', document)

                    self.clear_request_errors(request)

            #from pprint import pprint; print '====== status:'; pprint(status)

            # Generate report
            # ---------------

            # TODO: Format more professionally incl. generator description
            # TODO: Unify with "pdf_universal_multi"

            delivered_items = []
            missing_items = []
            for document, kinds in status.iteritems():
                delivered = []
                missing = []
                for kind, ok in kinds.iteritems():
                    if ok:
                        delivered.append(kind)
                    else:
                        missing.append(kind)

                if delivered:
                    item = u'{document:20}{delivered}'.format(
                        document=document, delivered=u', '.join(delivered))
                    delivered_items.append(item)
                if missing:
                    item = u'{document:20}{missing}'.format(
                        document=document, missing=u', '.join(missing))
                    missing_items.append(item)

            if delivered_items or missing_items:

                report_template = dedent("""
                Delivered artifacts ({delivered_count}):
                {delivered_files}

                Missing artifacts ({missing_count}):
                {missing_files}
                """).strip()

                report = report_template.format(
                    delivered_count=len(delivered_items),
                    missing_count=len(missing_items),
                    delivered_files='\n'.join(delivered_items),
                    missing_files='\n'.join(missing_items),
                )
                log.info('Export report:\n{report}'.format(report=report))
                zipfile.writestr('media/xml/@report.txt', report)

        payload = buffer.getvalue()

        return payload