Exemple #1
0
def ops_family_members(document_number):

    pointer_results = JsonPointer('/ops:world-patent-data/ops:patent-family/ops:family-member')
    pointer_publication_reference = JsonPointer('/publication-reference/document-id')
    pointer_application_reference = JsonPointer('/application-reference/document-id')
    #pointer_priority_claim_reference = JsonPointer('/priority-claim/document-id')

    response = ops_family_inpadoc('publication', document_number, '')

    family_members = OPSFamilyMembers()

    family_members.raw = to_list(pointer_results.resolve(response))
    for result in family_members.raw:

        # B.1 get publication and application references
        pubref = pointer_publication_reference.resolve(result)
        pubref_number, pubref_date = _get_document_number_date(pubref, 'docdb')
        pubref_number_epodoc, pubref_date_epodoc = _get_document_number_date(pubref, 'epodoc')
        appref = pointer_application_reference.resolve(result)
        appref_number, appref_date = _get_document_number_date(appref, 'docdb')
        family_members.items.append({
            'publication': {'number-docdb': pubref_number, 'date': pubref_date, 'number-epodoc': pubref_number_epodoc, },
            'application': {'number-docdb': appref_number, 'date': appref_date},
            })

    #log.info('Family members for %s:\n%s', document_number, family_members)

    return family_members
Exemple #2
0
    def read(self):
        pointer_results = JsonPointer(
            '/ops:world-patent-data/ops:patent-family/ops:family-member')
        family_members = to_list(pointer_results.resolve(self.data))
        for family_member in family_members:

            # Decode document number
            publication_number = 'unknown'
            try:
                document_id = JsonPointer('/publication-reference/document-id')
                publication_number, publication_date = OPSExchangeDocumentDecoder.document_number_date(
                    document_id.resolve(family_member), 'epodoc')
            except JsonPointerException:
                pass

            # Read bibliographic data for family member
            item = OPSExchangeDocument()
            try:
                item.read(family_member)
                self.results.append(item)
            except JsonPointerException as ex:
                if "member 'exchange-document' not found" in ex.message:
                    logger.debug(
                        'No bibliographic data for family member "{}"'.format(
                            publication_number))
                else:
                    raise
Exemple #3
0
def createOffsetMeta(offset, bookkeeping):
    """
    sets up a location to track rule and step ids for a given scope offset
    """
    pointer = JsonPointer(offset)
    view = bookkeeping
    for x in pointer.parts:
        view = view.setdefault(x, {})
    pointer.resolve(bookkeeping).setdefault("_meta", {"stages": [], "steps": []})
Exemple #4
0
    def add(self, name, value, *ignored_args, **ignored_kwargs):
        """ Adds a new JSON Pointer expression to the store.
        """
        # Make sure it's valid, no exception in 'resolve' means the expression was valid.
        pointer = JsonPointer(value)
        pointer.resolve({}, None)

        with self.update_lock:
            self.data[name] = pointer
Exemple #5
0
    def add(self, name, config, *ignored_args, **ignored_kwargs):
        """ Adds a new JSON Pointer expression to the store.
        """
        # Make sure it's valid, no exception in 'resolve' means the expression was valid.
        pointer = JsonPointer(config.value)
        pointer.resolve({}, None)

        with self.update_lock:
            self.data[name] = pointer
Exemple #6
0
def createOffsetMeta(offset, bookkeeping):
    '''
    sets up a location to track rule and step ids for a given scope offset
    '''
    pointer = JsonPointer(offset)
    view = bookkeeping
    for x in pointer.parts:
        view = view.setdefault(x,{})
    pointer.resolve(bookkeeping).setdefault('_meta',{'stages': [], 'steps': []})
Exemple #7
0
def createOffsetMeta(offset, bookkeeping):
    '''
    sets up a location to track rule and step ids for a given scope offset
    '''
    pointer = JsonPointer(offset)
    view = bookkeeping
    for x in pointer.parts:
        view = view.setdefault(x, {})
    pointer.resolve(bookkeeping).setdefault('_meta', {
        'stages': [],
        'steps': []
    })
Exemple #8
0
    def addRule(self, rule, offset='', identifier = None):
        '''
        add a DAG extension rule, possibly with a scope offset
        '''
        thisoffset = JsonPointer(offset)
        if offset != '':
            createIndexData(thisoffset.path, self.steps, self.values)
        createOffsetMeta(thisoffset.path, self.bookkeeper)

        offsetstage = OffsetStage(rule, self._makeoffset(offset), identifier = identifier)
        self.rules += [offsetstage]
        thisoffset.resolve(self.bookkeeper)['_meta']['stages'] += [offsetstage.identifier]
        return offsetstage.identifier
Exemple #9
0
    def addRule(self, rule, offset="", identifier=None):
        """
        add a DAG extension rule, possibly with a scope offset
        """
        thisoffset = JsonPointer(offset)
        if offset != "":
            createIndexData(thisoffset.path, self.steps, self.values)
        createOffsetMeta(thisoffset.path, self.bookkeeper)

        offsetstage = OffsetStage(rule, self._makeoffset(offset), identifier=identifier)
        self.rules += [offsetstage]
        thisoffset.resolve(self.bookkeeper)["_meta"]["stages"] += [
            offsetstage.identifier
        ]
        return offsetstage.identifier
Exemple #10
0
        def _resolve_mappings(body):
            # from: https://stackoverflow.com/a/39016088
            def item_generator(json_input, lookup_key):
                if isinstance(json_input, dict):
                    for k, v in list(json_input.items()):
                        if k == lookup_key:
                            yield json_input
                        else:
                            yield from item_generator(v, lookup_key)

                elif isinstance(json_input, list):
                    for item in json_input:
                        yield from item_generator(item, lookup_key)

            for el in item_generator(body, 'type'):
                if '#' in el['type']:
                    # referenced type - split it into filename and jsonpointer
                    el_type = el['type'].split('#', maxsplit=1)

                    # get mapping from loaded mappings
                    mapping = included_mappings(el_type[0])
                    ptr = JsonPointer(el_type[1])
                    included_mapping_type = ptr.resolve(mapping)
                    _resolve_mappings(included_mapping_type)
                    merged = _merge_dicts(el, included_mapping_type)
                    el.update(merged)

            for el in item_generator(body, 'allOf'):
                if 'properties' in el:
                    props = {}
                    for el_type in el['allOf']:
                        props.update(el_type.get('properties', {}))

                    el['properties'].update(props)
                    el.pop('allOf')
Exemple #11
0
def _result_list_compact(response):
    items = []

    pointer_results = JsonPointer('/ops:world-patent-data/ops:biblio-search/ops:search-result/exchange-documents')
    pointer_application_reference = JsonPointer('/exchange-document/bibliographic-data/application-reference/document-id')
    pointer_publication_reference = JsonPointer('/exchange-document/bibliographic-data/publication-reference/document-id')
    pointer_invention_title = JsonPointer('/exchange-document/bibliographic-data/invention-title')
    pointer_abstract = JsonPointer('/exchange-document/abstract')
    pointer_applicant = JsonPointer('/exchange-document/bibliographic-data/parties/applicants/applicant')
    pointer_inventor = JsonPointer('/exchange-document/bibliographic-data/parties/inventors/inventor')

    results = to_list(pointer_results.resolve(response))
    for result in results:

        pubref = pointer_publication_reference.resolve(result)
        pubref_number, pubref_date = _get_document_number_date(pubref, 'epodoc')
        pubref_date = pubref_date and '-'.join([pubref_date[:4], pubref_date[4:6], pubref_date[6:8]])

        appref = pointer_application_reference.resolve(result)
        appref_number, appref_date = _get_document_number_date(appref, 'epodoc')
        appref_date = appref_date and '-'.join([appref_date[:4], appref_date[4:6], appref_date[6:8]])

        try:
            titles = to_list(pointer_invention_title.resolve(result))
            titles = map(_format_title, titles)
        except JsonPointerException:
            titles = None

        try:
            abstracts = to_list(pointer_abstract.resolve(result))
            abstracts = map(_format_abstract, abstracts)
        except JsonPointerException:
            abstracts = None

        try:
            applicants = to_list(pointer_applicant.resolve(result))
            applicants = _mogrify_parties(applicants, 'applicant-name')
        except JsonPointerException:
            applicants = None

        try:
            inventors = to_list(pointer_inventor.resolve(result))
            inventors = _mogrify_parties(inventors, 'inventor-name')
        except JsonPointerException:
            inventors = None

        item = {
            'abstract': abstracts,
            'appdate': appref_date,
            'appnumber': appref_number,
            'pubdate': pubref_date,
            'pubnumber': pubref_number,
            'title': titles,
            'applicant': applicants,
            'inventor': inventors,
        }
        items.append(item)

    return items
Exemple #12
0
def query_ops(query, limit=50):
    #print 'query:', query
    response = ops_published_data_search('biblio', query, '1-{0}'.format(limit))
    #print response

    pointer_total_count = JsonPointer('/ops:world-patent-data/ops:biblio-search/@total-result-count')
    total_count = int(pointer_total_count.resolve(response))
    log.info('query: %s, total_count: %s', query, total_count)

    return response, total_count
Exemple #13
0
 def resolve(self, doc, default=jsonpointer._nothing):
     if self.isHash:
         if len(self.parts) == 1:
             refdata = doc
         else:
             p = JsonPointer('/' + '/'.join(self.parts[:-1]))
             refdata = p.resolve(doc)
         if isinstance(refdata, list):
             return int(self.parts[-1])
         else:
             return self.parts[-1]
     else:
         return super(RelJsonPointer, self).resolve(doc, default)
Exemple #14
0
 def resolve(self, doc, default=jsonpointer._nothing):
     if self.isHash:
         if len(self.parts) == 1:
             refdata = doc
         else:
             p = JsonPointer('/' + '/'.join(self.parts[:-1]))
             refdata = p.resolve(doc)
         if isinstance(refdata, list):
             return int(self.parts[-1])
         else:
             return self.parts[-1]
     else:
         return super(RelJsonPointer, self).resolve(doc, default)
Exemple #15
0
 def read(self):
     pointer_results = JsonPointer(
         '/ops:world-patent-data/ops:register-search/reg:register-documents'
     )
     register_documents = to_list(pointer_results.resolve(self.data))
     for register_document in register_documents:
         item = OPSRegisterDocument()
         try:
             item.read(register_document)
             self.results.append(item)
         except JsonPointerException as ex:
             logger.warning(
                 'Could not read register information from data "{}": {}\n{}'
                 .format(register_document, ex, exception_traceback()))
Exemple #16
0
def ops_published_data_search_real(constituents, query, range):

    # OPS client object, impersonated for the current user.
    ops = get_ops_client()

    # Send request to OPS.
    range_begin, range_end = map(int, range.split('-'))
    response = ops.published_data_search(
        query, range_begin=range_begin, range_end=range_end, constituents=to_list(constituents))

    # Decode OPS response from JSON
    payload = handle_response(response, 'ops-search')

    if response.headers['content-type'].startswith('application/json'):

        # Decode total number of results.
        pointer_total_count = JsonPointer('/ops:world-patent-data/ops:biblio-search/@total-result-count')
        count_total = int(pointer_total_count.resolve(payload))

        # Raise an exception to skip caching empty results.
        if count_total == 0:
            raise NoResultsException('No results', data=payload)

        return payload
Exemple #17
0
def analytics_family(query):

    payload = {}
    family_has_statistics = {}
    family_has_designated_states = {}

    # A. aggregate list of publication numbers
    # http://ops.epo.org/3.1/rest-services/published-data/search/full-cycle/?q=pa=%22MAMMUT%20SPORTS%20GROUP%20AG%22
    # TODO: step through all pages
    response = ops_published_data_search('biblio', query, '1-50')
    pointer_results = JsonPointer('/ops:world-patent-data/ops:biblio-search/ops:search-result/exchange-documents')
    pointer_family_id = JsonPointer('/exchange-document/@family-id')
    pointer_publication_reference = JsonPointer('/exchange-document/bibliographic-data/publication-reference/document-id')

    # A.1 compute distinct list with unique families
    family_representatives = {}
    results = to_list(pointer_results.resolve(response))
    for result in results:
        family_id = pointer_family_id.resolve(result)
        # TODO: currently, use first document as family representative; this could change
        if family_id not in family_representatives:
            document_id_entries = pointer_publication_reference.resolve(result)
            doc_number, date = _get_document_number_date(document_id_entries, 'epodoc')
            if doc_number:
                family_representatives[family_id] = doc_number


    # B. Enrich all family representatives
    # http://ops.epo.org/3.1/rest-services/family/application/docdb/US19288494.xml
    for family_id, document_number in family_representatives.iteritems():

        payload.setdefault(family_id, {})

        # B.1 Aggregate all family members
        try:
             family = ops_family_members(document_number)
             family_members = family.items
             payload[family_id]['family-members'] = family_members
        except Exception as ex:
            request = get_current_request()
            del request.errors[:]
            log.warn('Could not fetch OPS family for {0}'.format(document_number))
            continue

        # B.2 Use first active priority
        for family_member_raw in family.raw:
            if 'priority-claim' not in payload[family_id]:
                for priority_claim in to_list(family_member_raw['priority-claim']):
                    try:
                        if priority_claim['priority-active-indicator']['$'] == 'YES':
                            prio_number, prio_date = _get_document_number_date(priority_claim['document-id'], 'docdb')
                            payload[family_id]['priority-claim'] = {'number-docdb': prio_number, 'date': prio_date}
                    except KeyError:
                        pass

        # B.3 Compute word- and image-counts for EP publication
        for statistics_country in ['EP', 'WO', 'AT', 'CA', 'CH', 'GB', 'ES']:

            if family_id in family_has_statistics:
                break

            for family_member in family_members:
                pubref_number = family_member['publication']['number-epodoc']
                if pubref_number.startswith(statistics_country):
                    statistics = {}

                    # B.3.1 get data about claims
                    try:
                        claims_response = ops_claims(pubref_number)
                        pointer_claims = JsonPointer('/ops:world-patent-data/ftxt:fulltext-documents/ftxt:fulltext-document/claims')
                        claims = pointer_claims.resolve(claims_response)
                        claim_paragraphs = []
                        for part in to_list(claims['claim']['claim-text']):
                            claim_paragraphs.append(part['$'])
                        claim_text = '\n'.join(claim_paragraphs)
                        statistics['claims-language'] = claims['@lang']
                        statistics['claims-words-first'] = len(claim_paragraphs[0].split())
                        statistics['claims-words-total'] = len(claim_text.split())
                        statistics['claims-count'] = len(claim_paragraphs)

                    except Exception as ex:
                        request = get_current_request()
                        del request.errors[:]
                        log.warn('Could not fetch OPS claims for {0}'.format(pubref_number))

                    # B.3.2 get data about description
                    try:
                        description_response = ops_description(pubref_number)
                        pointer_description = JsonPointer('/ops:world-patent-data/ftxt:fulltext-documents/ftxt:fulltext-document/description')
                        descriptions = pointer_description.resolve(description_response)
                        description_paragraphs = []
                        for part in to_list(descriptions['p']):
                            description_paragraphs.append(part['$'])
                        description_text = '\n'.join(description_paragraphs)
                        statistics['description-words-total'] = len(description_text.split())

                    except Exception as ex:
                        request = get_current_request()
                        del request.errors[:]
                        log.warn('Could not fetch OPS description for {0}'.format(pubref_number))


                    if statistics:

                        # B.3.3 get data about image count
                        try:
                            pubref_number_docdb = family_member['publication']['number-docdb']
                            imginfo = inquire_images(pubref_number_docdb)
                            statistics['drawings-count'] = imginfo['META']['drawing-total-count']

                        except Exception as ex:
                            request = get_current_request()
                            del request.errors[:]

                        family_member['statistics'] = statistics
                        family_has_statistics[family_id] = True
                        break

        # B.4 compute designated states
        pointer_designated_states = JsonPointer('/ops:world-patent-data/ops:register-search/reg:register-documents/reg:register-document/reg:bibliographic-data/reg:designation-of-states')
        for country in ['EP', 'WO']:

            if family_id in family_has_designated_states:
                break

            for family_member in family_members:
                pubref_number = family_member['publication']['number-epodoc']
                if pubref_number.startswith(country):
                    try:
                        reginfo_payload = ops_register('publication', pubref_number, 'biblio')
                    except:
                        request = get_current_request()
                        del request.errors[:]
                        log.warn('Could not fetch OPS register information for {0}'.format(pubref_number))
                        continue

                    designated_states_list = pointer_designated_states.resolve(reginfo_payload)
                    designated_states_info = to_list(designated_states_list)[0]
                    try:
                        regional_info = designated_states_info['reg:designation-pct']['reg:regional']
                        family_member.setdefault('register', {})
                        family_member['register']['designated-states'] = {
                            'gazette-num': designated_states_info['@change-gazette-num'],
                            'region': regional_info['reg:region']['reg:country']['$'],
                            'countries': list(_flatten_ops_json_list(regional_info['reg:country'])),
                        }
                        family_has_designated_states[family_id] = True
                        break

                    except Exception as ex:
                        log.error('Retrieving designated states for {0} failed.'.format(pubref_number))


    return payload
Exemple #18
0
def results_swap_family_members(response):

    #pointer_results = JsonPointer('/ops:world-patent-data/ops:biblio-search/ops:search-result/ops:publication-reference')
    #entries = pointer_results.resolve(results)

    publication_numbers = []

    # DE, EP..B, WO, EP..A2, EP..A3, EP, US
    priorities = [
        {'filter': lambda patent: patent.country.startswith('DE') and not patent.kind.startswith('D1')},
        {'filter': lambda patent: patent.country.startswith('EP') and patent.kind.startswith('B')},
        {'filter': 'WO'},
        {'filter': lambda patent: patent.country.startswith('EP') and patent.kind.startswith('A')},
        {'filter': 'EP'},
        {'filter': 'US'},
    ]

    def match_filter(item, filter):
        if callable(filter):
            patent = split_patent_number(item)
            outcome = filter(patent)
        else:
            outcome = item.startswith(filter)
        return outcome

    pointer_results = JsonPointer('/ops:world-patent-data/ops:biblio-search/ops:search-result/exchange-documents')
    pointer_publication_reference = JsonPointer('/bibliographic-data/publication-reference/document-id')
    #pointer_publication_reference = JsonPointer('/exchange-document/bibliographic-data/publication-reference/document-id')

    # A.1 compute distinct list with unique families
    family_representatives = {}
    chunks = to_list(pointer_results.resolve(response))
    all_results = []
    for chunk in chunks:

        #print 'chunk:', chunk

        # Prepare list of document cycles
        #chunk_results = to_list(pointer_publication_reference.resolve(chunk))
        cycles = to_list(chunk['exchange-document'])

        # Publication number of first cycle in EPODOC format
        representation = cycles[0]
        pubref = pointer_publication_reference.resolve(representation)
        representation_pubref_epodoc, _ = _get_document_number_date(pubref, 'epodoc')

        # All publication numbers in DOCDB format
        representation_pubrefs_docdb = []
        for cycle in cycles:
            pubref = pointer_publication_reference.resolve(cycle)
            representation_pubref_docdb, _ = _get_document_number_date(pubref, 'docdb')
            representation_pubrefs_docdb.append(representation_pubref_docdb)

        # Debugging
        #print 'representation_pubref_epodoc:', representation_pubref_epodoc
        #print 'representation_pubrefs_docdb:', representation_pubrefs_docdb

        # Fetch family members. When failing, use first cycle as representation.
        try:
            family_info = ops_family_members(representation_pubref_epodoc)
        except:
            log.warning('Failed to fetch family information for %s', representation_pubref_epodoc)
            chunk['exchange-document'] = representation
            request = get_current_request()
            del request.errors[:]
            continue

        #members = family_info.publications_by_country()
        #pprint(members)

        # Find replacement from list of family members controlled by priority list.
        for prio in priorities:

            filter = prio['filter']

            # Debugging
            #print 'checking prio:', filter

            if match_filter(representation_pubref_epodoc, filter):
                break

            bibdata = None
            found = False
            for member in family_info.items:

                # Debugging
                #print 'member:'; pprint(member)

                member_pubnum = member['publication']['number-docdb']

                if match_filter(member_pubnum, filter):

                    # Debugging
                    #print 'Filter matched for member:', member_pubnum

                    try:
                        bibdata = ops_biblio_documents(member_pubnum)
                    except:
                        #log.warning('Fetching bibliographic data failed for %s', member_pubnum)
                        request = get_current_request()
                        del request.errors[:]
                        continue

                    #pprint(bibdata)
                    if bibdata:

                        # TODO: Add marker that this document was swapped, display appropriately.
                        found = True
                        break

            # Swap representation of document by appropriate family member
            # and set a marker in the data structure containing the original
            # document number(s).
            if found:

                representation = bibdata
                #print 'representation:'; pprint(representation)

                representation[0].setdefault('__meta__', {})
                representation[0]['__meta__']['swapped'] = {
                    'canonical': representation_pubrefs_docdb[0],
                    'list': [representation_pubref_epodoc] + representation_pubrefs_docdb,
                    }

                break

        # TODO: Here, duplicate documents might be. Prune/deduplicate them.
        # TODO: When choosing german family members (e.g. for EP666666), abstract is often missing.
        # TODO: => Carry along from original representation.

        """
        for result in cycles:
            #pprint(result)
            pubref = pointer_publication_reference.resolve(result)
            #print entry, pubref
            pubref_number, pubref_date = _get_document_number_date(pubref, 'docdb')
            publication_numbers.append(pubref_number)
        """

        chunk['exchange-document'] = representation

    # Filter duplicates
    seen = []
    results = []
    fields = ['@country', '@doc-number', '@kind', '@family-id']
    for chunk in chunks:

        # Prepare list of document cycles.
        cycles = to_list(chunk['exchange-document'])

        # Only look at first cycle slot.
        doc = cycles[0]

        # Compute unique document identifier.
        ident = {}
        for key in fields:
            ident[key] = doc[key]

        # Collect chunk if not seen yet.
        if ident in seen:
            continue
        else:
            seen.append(ident)
            results.append(chunk)

    # Overwrite reduced list of chunks in original DOM.
    pointer_results.set(response, results)

    return publication_numbers
Exemple #19
0
def ops_published_data_crawl(constituents, query, chunksize):

    if constituents != 'pub-number':
        raise ValueError('constituents "{0}" invalid or not implemented yet'.format(constituents))

    real_constituents = constituents
    if constituents == 'pub-number':
        constituents = ''

    # fetch first chunk (1-chunksize) from upstream
    first_chunk = ops_published_data_search(constituents, query, '1-{0}'.format(chunksize))
    #print first_chunk

    pointer_total_count = JsonPointer('/ops:world-patent-data/ops:biblio-search/@total-result-count')
    total_count = int(pointer_total_count.resolve(first_chunk))
    log.info('ops_published_data_crawl total_count: %s', total_count)

    # The first 2000 hits are accessible from OPS.
    total_count = min(total_count, 2000)

    # collect upstream results
    begin_second_chunk = chunksize + 1
    chunks = [first_chunk]
    for range_begin in range(begin_second_chunk, total_count + 1, chunksize):

        # countermeasure to robot flagging
        # <code>CLIENT.RobotDetected</code>
        # <message>Recent behaviour implies you are a robot. The server is at the moment busy to serve robots. Please try again later</message>
        time.sleep(5)

        range_end = range_begin + chunksize - 1
        range_string = '{0}-{1}'.format(range_begin, range_end)
        log.info('ops_published_data_crawl range: ' + range_string)
        chunk = ops_published_data_search(constituents, query, range_string)
        #print 'chunk:', chunk
        chunks.append(chunk)

    #return chunks

    # merge chunks into single result
    """
    <empty>:    "ops:search-result" { » "ops:publication-reference": [
    biblio:     "ops:search-result" { » "exchange-documents": [ » "exchange-document": {
    abstract:   "ops:search-result" { » "exchange-documents": [ » "exchange-document": {
    full-cycle: "ops:search-result" { » "exchange-documents": [ » "exchange-document": [
    pub-number: "ops:search-result" { » "ops:publication-reference": [
                        {
                            "@family-id": "6321653",
                            "@system": "ops.epo.org",
                            "document-id": {
                                "@document-id-type": "docdb",
                                "country": {
                                    "$": "DE"
                                },
                                "doc-number": {
                                    "$": "3705908"
                                },
                                "kind": {
                                    "$": "A1"
                                }
                            }
                        },
    """
    pointer_results = JsonPointer('/ops:world-patent-data/ops:biblio-search/ops:search-result/ops:publication-reference')
    #pointer_time_elapsed = JsonPointer('/ops:world-patent-data/ops:meta/@value')
    all_results = []
    #time_elapsed = int(pointer_time_elapsed.resolve(first_chunk))
    for chunk in chunks:

        # FIXME: use this for "real_constituents == 'pub-number'" only
        chunk_results = to_list(pointer_results.resolve(chunk))

        # FIXME: implement other constituents

        #print 'chunk_results:', chunk_results
        all_results += chunk_results

        #time_elapsed += int(pointer_time_elapsed.resolve(chunk))

    response = None
    if real_constituents == 'pub-number':

        response = first_chunk

        # delete upstream data
        del resolve_pointer(response, '/ops:world-patent-data/ops:biblio-search/ops:search-result')['ops:publication-reference']

        # compute own representation
        publication_numbers = []
        pointer_document_id = JsonPointer('/document-id')
        for entry in all_results:
            pubref = pointer_document_id.resolve(entry)
            #print entry, pubref
            pubref_number, pubref_date = _get_document_number_date(pubref, 'docdb')
            publication_numbers.append(pubref_number)

        # add own representation
        set_pointer(response, '/ops:world-patent-data/ops:biblio-search/ops:search-result/publication-numbers', publication_numbers, inplace=True)

        # amend metadata
        new_total_count = str(len(publication_numbers))
        pointer_total_count.set(response, new_total_count)
        set_pointer(response, '/ops:world-patent-data/ops:biblio-search/ops:range', {'@begin': '1', '@end': new_total_count})
        #pointer_time_elapsed.set(response, str(time_elapsed))

    if not response:
        raise ValueError('constituents "{0}" invalid or not implemented yet'.format(constituents))

    return response
Exemple #20
0
def createIndexData(offset, stepindex, valueindex):
    pointer = JsonPointer(offset)
    pointer.resolve(stepindex)['_offset'] = offset
    pointer.set(valueindex, {})
Exemple #21
0
 def decode_countries(node, pointer):
     countries_pointer = JsonPointer(pointer)
     countries_raw = to_list(countries_pointer.resolve(node))
     countries = [country_raw['$'] for country_raw in countries_raw]
     return countries
Exemple #22
0
    def __init__(self, directive, arguments, options, content, lineno, content_offset, block_text, state, state_machine):
        assert directive == 'jsonschema'

        self.options = options
        self.state = state
        self.lineno = lineno
        self.statemachine = state_machine

        if len(arguments) == 1:
            filename, pointer = self._splitpointer(arguments[0])
            if filename != '':
                self._load_external(filename)
            else:
                self._load_internal(content)
            if pointer:
                self.schema = resolve_pointer(self.schema, pointer)
        else:
            self._load_internal(content)

        hidden_paths = self.options.get('hide')
        if hidden_paths is not None:
            orig_schema = json.loads(json.dumps(self.schema))

            for hidden_path in hidden_paths.split(' '):
                ptr = JsonPointer(hidden_path)
                parent, name = ptr.to_last(self.schema)
                del parent[name]

            shown_paths = self.options.get('show')

            for shown_path in shown_paths.split(' '):
                ptr = JsonPointer(shown_path)

                orig_parent = orig_schema
                current_parent = self.schema

                for part in ptr.parts[:-1]:
                    orig_parent = ptr.walk(orig_parent, part)

                    try:
                        current_parent = ptr.walk(current_parent, part)
                    except JsonPointerException:
                        if isinstance(orig_parent, Sequence):
                            new_entry = []
                        elif isinstance(orig_parent, Mapping):
                            new_entry = OrderedDict()
                        else:
                            raise Exception('Unsupported type parent')

                        if isinstance(current_parent, MutableSequence):
                            current_parent.append(new_entry)
                        elif isinstance(current_parent, MutableMapping):
                            current_parent[part] = new_entry

                        current_parent = new_entry

                if isinstance(current_parent, MutableSequence):
                    current_parent.append(ptr.resolve(orig_schema))
                elif isinstance(current_parent, MutableMapping):
                    current_parent[ptr.parts[-1]] = ptr.resolve(orig_schema)
                else:
                    raise Exception('Unsupported type parent')
Exemple #23
0
 def test_json_pointer_on_dict():
     ref_dict = RefDict("base/reflist.json#/")
     pointer = JsonPointer("/definitions/foo/not/0")
     assert pointer.resolve(ref_dict) == {"type": "object"}
Exemple #24
0
def createIndexData(offset, stepindex, valueindex):
    pointer = JsonPointer(offset)
    pointer.resolve(stepindex)['_offset'] = offset
    pointer.set(valueindex, {})