Ejemplo n.º 1
0
def ops_family_inpadoc_json_handler(request):
    reference_type = request.matchdict.get('reference_type')
    patent = request.matchdict.get('patent')

    # constituents: biblio, legal
    constituents = request.params.get('constituents', '')

    return ops_family_inpadoc(reference_type, patent, constituents)
Ejemplo n.º 2
0
    def to_zip(self, request=None, options=None):
        """
         u'options': {u'media': {u'biblio': False,
                                 u'claims': False,
                                 u'description': False,
                                 u'pdf': True,
                                 u'register': False},
                      u'report': {u'csv': True,
                                  u'json': True,
                                  u'pdf': False,
                                  u'xlsx': False}},
        """

        # TODO: Text representations for biblio, register, family
        # TODO: PDF Extracts

        options = options or bunchify({'report': {}, 'media': {}})

        # Remove entries with empty/undefined document numbers
        self.df_documents.dropna(subset=['document'], inplace=True)

        # Reject entries with seen == True
        filtered = self.df_documents[(self.df_documents.seen == False)]
        documents = list(filtered.document)

        buffer = BytesIO()
        with ZipFile(buffer, 'w', ZIP_DEFLATED) as zipfile:

            # FIXME: Add TERMS (liability waiver) and more...
            zipfile.writestr('@readme.txt',
                             u'Zip archive created by IP Navigator.')

            # Add text summary
            zipfile.writestr('@metadata.txt',
                             self.get_metadata().encode('utf-8'))
            zipfile.writestr('@summary.txt',
                             self.get_summary().encode('utf-8'))

            # Report files
            # ------------

            # Add Workbook
            workbook_payload = None
            if options.report.xlsx:
                workbook_payload = DossierXlsx(self.data).create()
                zipfile.writestr('report/@dossier.xlsx', workbook_payload)

            # Add Workbook in PDF format
            if options.report.pdf:
                try:
                    zipfile.writestr(
                        'report/@dossier.pdf',
                        DossierXlsx(
                            self.data).to_pdf(payload=workbook_payload))
                except Exception as ex:
                    log.error(u'Rendering dossier to PDF failed. ' \
                              u'Exception: {ex}\n{trace}'.format(ex=ex, trace=exception_traceback()))

            # Add CSV
            if options.report.csv:
                zipfile.writestr('report/csv/01-queries.csv',
                                 self.to_csv(self.df_queries))
                zipfile.writestr('report/csv/02-documents.csv',
                                 self.to_csv(self.df_documents))
                zipfile.writestr('report/csv/03-comments.csv',
                                 self.to_csv(self.df_comments))

            # Add JSON
            if options.report.json:
                zipfile.writestr('report/json/01-queries.json',
                                 self.to_json(self.df_queries))
                zipfile.writestr('report/json/02-documents.json',
                                 self.to_json(self.df_documents))
                zipfile.writestr('report/json/03-comments.json',
                                 self.to_json(self.df_comments))

            # Media files
            # -----------

            # FIXME: This should go to some configuration setting.
            fulltext_countries_excluded_ops = [
                'BE', 'CN', 'DD', 'DE', 'DK', 'FR', 'GR', 'HU', 'JP', 'LU',
                'KR', 'RU', 'PT', 'SE', 'TR', 'SK', 'US'
            ]

            # Add full PDF documents
            if options.media.pdf:
                pdf_ziparchive_add(zipfile, documents, path='media/pdf')

            # Add XML data
            # TODO: Add @report.txt for reflecting missing documents, differentiate between different XML kinds.
            # TODO: Add more TEXT formats (.abstract.txt, .biblio.txt, .register.txt)
            # TODO: Add ST.36 XML; e.g. from https://register.epo.org/download?number=EP08835045&tab=main&xml=st36
            # via https://register.epo.org/application?number=EP08835045
            # TODO: Add equivalents, e.g. http://ops.epo.org/3.1/rest-services/published-data/publication/epodoc/EP1000000/equivalents/biblio
            status = OrderedDict()
            for document in documents:

                if not document or not document.strip():
                    continue

                log.info('Data acquisition for document {document}'.format(
                    document=document))

                status.setdefault(document, OrderedDict())
                patent = decode_patent_number(document)

                # Add XML "bibliographic" data (full-cycle)
                if options.media.biblio:
                    try:
                        biblio_payload = get_ops_biblio_data('publication',
                                                             document,
                                                             xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.biblio.xml'.format(
                                document=document), biblio_payload)
                        status[document]['biblio'] = True

                    except Exception as ex:
                        status[document]['biblio'] = False
                        self.handle_exception(ex, 'biblio', document)

                    self.clear_request_errors(request)

                # Add XML "description" full text data
                # OPS does not have full texts for DE, US, ...
                if options.media.description:
                    status[document]['description'] = False
                    if patent.country not in fulltext_countries_excluded_ops:
                        try:
                            # Write XML
                            document_number = encode_epodoc_number(patent)
                            description_payload = ops_description(
                                document_number, xml=True)
                            zipfile.writestr(
                                'media/xml/{document}.description.xml'.format(
                                    document=document), description_payload)
                            status[document]['description'] = True

                            # Write TEXT
                            with ignored():
                                text_payload = self.get_fulltext(
                                    description_payload, 'description')
                                if text_payload:
                                    zipfile.writestr(
                                        'media/txt/{document}.description.txt'.
                                        format(document=document),
                                        text_payload.encode('utf-8'))

                        except Exception as ex:
                            self.handle_exception(ex, 'description', document)

                    self.clear_request_errors(request)

                # Add XML "claims" full text data
                # OPS does not have full texts for DE, US, ...
                if options.media.claims:
                    status[document]['claims'] = False
                    if patent.country not in fulltext_countries_excluded_ops:
                        try:
                            # Write XML
                            document_number = encode_epodoc_number(patent)
                            claims_payload = ops_claims(document_number,
                                                        xml=True)
                            zipfile.writestr(
                                'media/xml/{document}.claims.xml'.format(
                                    document=document), claims_payload)
                            status[document]['claims'] = True

                            # Write TEXT
                            with ignored():
                                text_payload = self.get_fulltext(
                                    claims_payload.replace(
                                        '<claim-text>',
                                        '<p>').replace('</claim-text>',
                                                       '</p>'), 'claims')
                                if text_payload:
                                    zipfile.writestr(
                                        'media/txt/{document}.claims.txt'.
                                        format(document=document),
                                        text_payload.encode('utf-8'))

                        except Exception as ex:
                            self.handle_exception(ex, 'claims', document)

                    self.clear_request_errors(request)

                # Add XML register data
                if options.media.register:

                    try:
                        register_payload = ops_register('publication',
                                                        document,
                                                        xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.register.xml'.format(
                                document=document), register_payload)
                        status[document]['register'] = True

                    except Exception as ex:
                        status[document]['register'] = False
                        self.handle_exception(ex, 'register', document)

                    self.clear_request_errors(request)

                # Add XML family data
                if options.media.family:
                    try:
                        document_number = encode_epodoc_number(
                            patent, {'nokind': True})
                        family_payload = ops_family_inpadoc('publication',
                                                            document_number,
                                                            'biblio',
                                                            xml=True)
                        zipfile.writestr(
                            'media/xml/{document}.family.xml'.format(
                                document=document), family_payload)
                        status[document]['family'] = True

                    except Exception as ex:
                        status[document]['family'] = False
                        self.handle_exception(ex, 'family', document)

                    self.clear_request_errors(request)

            #from pprint import pprint; print '====== status:'; pprint(status)

            # Generate report
            # ---------------

            # TODO: Format more professionally incl. generator description
            # TODO: Unify with "pdf_universal_multi"

            delivered_items = []
            missing_items = []
            for document, kinds in status.iteritems():
                delivered = []
                missing = []
                for kind, ok in kinds.iteritems():
                    if ok:
                        delivered.append(kind)
                    else:
                        missing.append(kind)

                if delivered:
                    item = u'{document:20}{delivered}'.format(
                        document=document, delivered=u', '.join(delivered))
                    delivered_items.append(item)
                if missing:
                    item = u'{document:20}{missing}'.format(
                        document=document, missing=u', '.join(missing))
                    missing_items.append(item)

            if delivered_items or missing_items:

                report_template = dedent("""
                Delivered artifacts ({delivered_count}):
                {delivered_files}

                Missing artifacts ({missing_count}):
                {missing_files}
                """).strip()

                report = report_template.format(
                    delivered_count=len(delivered_items),
                    missing_count=len(missing_items),
                    delivered_files='\n'.join(delivered_items),
                    missing_files='\n'.join(missing_items),
                )
                log.info('Export report:\n{report}'.format(report=report))
                zipfile.writestr('media/xml/@report.txt', report)

        payload = buffer.getvalue()

        return payload