Python updated_schemaの例、scrapi.base.helpers.updated_schema Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_transformer.py プロジェクト: Johnetordoff/scrapi

    def test_failing_transformation_with_raises(self):
        base.settings.RAISE_IN_TRANSFORMER = True

        self.harvester.schema = updated_schema(TEST_SCHEMA, {'title': 'A completely 1n\/@lid expre55ion'})

        with pytest.raises(XPathEvalError) as e:
            x = [self.harvester.normalize(record) for record in self.harvester.harvest()]

コード例 #2

0

ファイルを表示

ファイル: test_transformer.py プロジェクト: Johnetordoff/scrapi

    def test_arg_kwargs(self):
        def process_title(title, title1="test"):
            return title[0] + (title1[0] if isinstance(title1, list) else title1)

        def process_title2(title1="test"):
            return title1[0] if isinstance(title1, list) else title1

        args = ("//dc:title/node()", )
        kwargs = {"title1": "//dc:title/node()"}

        self.harvester.schema = updated_schema(
            TEST_SCHEMA,
            {
                'title': (pack(*args, **kwargs), process_title),
                'otherProperties': build_properties(
                    ('title2', (pack(*args), process_title)),
                    ('title3', (pack(**kwargs), process_title2)),
                    ('title4', (pack('//dc:title/node()', title1='//dc:title/node()'), process_title))
                )
            }
        )


        results = [self.harvester.normalize(record) for record in self.harvester.harvest(days_back=1)]

        for result in results:
            assert result['title'] == "TestTest"
            assert result['otherProperties'][0]['properties']['title2'] == 'Testtest'
            assert result['otherProperties'][1]['properties']['title3'] == 'Test'
            assert result['otherProperties'][2]['properties']['title4'] == "TestTest"

コード例 #3

0

ファイルを表示

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris":
             ('//ns0:header/ns0:identifier/node()',
              '//dc:identifier/node()', oai_process_uris_addis_ababa)
         })

コード例 #4

0

ファイルを表示

ファイル: icpsr.py プロジェクト: AndrewSallans/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": {
             "canonicalUri": ('//dc:identifier/node()', helpers.compose(create_icpsr_url, helpers.single_result)),
             "objectUris": [('//dc:identifier/node()', icpsr_exttract_doi)]
         }
     })

コード例 #5

0

ファイルを表示

 def schema(self):
     return updated_schema(
         self._schema, {
             'contributors':
             ('//dc:creator/node()', '//dc:contributor/node()',
              aoi_process_contributors_bhl)
         })

コード例 #6

0

ファイルを表示

class DryadHarvester(OAIHarvester):
    short_name = 'dryad'
    long_name = 'Dryad Data Repository'
    url = 'http://www.datadryad.org/oai/request'

    base_url = 'http://www.datadryad.org/oai/request'
    property_list = ['rights', 'format', 'relation', 'date',
                     'identifier', 'type', 'setSpec']
    timezone_granularity = True

    schema = helpers.updated_schema(
        schemas.OAISCHEMA,
        {
            "uris": {
                "objectUris": ('//dc:relation/node()', '//dc:identifier/node()', format_dois_dryad)
            }
        }
    )

    def normalize(self, raw_doc):
        result = etree.XML(raw_doc['doc'])

        status = (result.xpath('//dc:status/node()', namespaces=self.namespaces) or [''])[0]
        if str(status).lower() in ['deleted', 'item is not available']:
            logger.info('Not normalizing record with ID {}, status {}'.format(raw_doc['docID'], status))
            return None
        doc_type = (result.xpath('//dc:type/node()', namespaces=self.namespaces) or [''])[0]
        if not doc_type.lower() == 'article':
            logger.info('Not normalizing record with ID {}, type {}'.format(raw_doc['docID'], doc_type))
            return None

        return super(OAIHarvester, self).normalize(raw_doc)

コード例 #7

0

ファイルを表示

ファイル: datacite.py プロジェクト: NeuroVault/scrapi

 def schema(self):
     return updated_schema(self._schema, {
         "description": ("//dc:description/node()", get_second_description),
         "uris": {
             "canonicalUri": ('//dc:identifier/node()', compose(single_result, oai_extract_dois)),
             "objectUris": ('//dc:identifier/node()', oai_extract_dois)
         }
     })

コード例 #8

0

ファイルを表示

ファイル: test_transformer.py プロジェクト: Johnetordoff/scrapi

    def test_failing_transformation_wont_raise(self):
        base.transformer.logger.setLevel(50)
        base.settings.RAISE_IN_TRANSFORMER = False

        self.harvester.schema = updated_schema(TEST_SCHEMA, {'title': 'A completely 1n\/@lid expre55ion'})

        with pytest.raises(ValidationError) as e:
            x = [self.harvester.normalize(record) for record in self.harvester.harvest()]

コード例 #9

0

ファイルを表示

ファイル: dryad.py プロジェクト: felliott/scrapi

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris": {
                 "objectUris": ('//dc:relation/node()',
                                '//dc:identifier/node()', format_dois_dryad)
             }
         })

コード例 #10

0

ファイルを表示

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris": {
                 "canonicalUri":
                 ('//ns0:header/ns0:identifier/node()',
                  helpers.compose(oai_extract_url_pubmedcentral,
                                  helpers.single_result))
             }
         })

コード例 #11

0

ファイルを表示

ファイル: __init__.py プロジェクト: jeffreyliu3230/scrapi

    def schema(self):
        properties = {
            'otherProperties':
            build_properties(*[(item, ('//dc:{}/node()'.format(item),
                                       '//ns0:{}/node()'.format(item),
                                       self.resolve_property))
                               for item in self.property_list])
        }

        return updated_schema(OAISCHEMA, properties)

コード例 #12

0

ファイルを表示

ファイル: __init__.py プロジェクト: bdyetton/scrapi

    def schema(self):
        properties = {
            'otherProperties': build_properties(*[(item, (
                '//dc:{}/node()'.format(item),
                '//ns0:{}/node()'.format(item),
                self.resolve_property)
            ) for item in self.property_list])
        }

        return updated_schema(OAISCHEMA, properties)

コード例 #13

0

ファイルを表示

 def schema(self):
     return updated_schema(
         self._schema, {
             "description":
             ("//dc:description/node()", get_second_description),
             "uris": {
                 "canonicalUri": ('//dc:identifier/node()',
                                  compose(single_result, oai_extract_dois)),
                 "objectUris": ('//dc:identifier/node()', oai_extract_dois)
             }
         })

コード例 #14

0

ファイルを表示

    def test_failing_transformation_with_raises(self):
        base.settings.RAISE_IN_TRANSFORMER = True

        self.harvester.schema = updated_schema(
            TEST_SCHEMA, {'title': 'A completely 1n\/@lid expre55ion'})

        with pytest.raises(XPathEvalError) as e:
            x = [
                self.harvester.normalize(record)
                for record in self.harvester.harvest()
            ]

コード例 #15

0

ファイルを表示

    def test_failing_transformation_wont_raise(self):
        base.transformer.logger.setLevel(50)
        base.settings.RAISE_IN_TRANSFORMER = False

        self.harvester.schema = updated_schema(
            TEST_SCHEMA, {'title': 'A completely 1n\/@lid expre55ion'})

        with pytest.raises(ValidationError) as e:
            x = [
                self.harvester.normalize(record)
                for record in self.harvester.harvest()
            ]

コード例 #16

0

ファイルを表示

ファイル: icpsr.py プロジェクト: zamattiac/scrapi

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris": {
                 "canonicalUri":
                 ('//dc:identifier/node()',
                  helpers.compose(create_icpsr_url, helpers.single_result)),
                 "objectUris": [
                     ('//dc:identifier/node()', icpsr_exttract_doi)
                 ]
             }
         })

コード例 #17

0

ファイルを表示

ファイル: bhl.py プロジェクト: jeffreyliu3230/scrapi

class BHLHarvester(OAIHarvester):
    short_name = 'bhl'
    long_name = 'Biodiversity Heritage Library OAI Repository'
    url = 'http://www.biodiversitylibrary.org/'

    base_url = 'http://www.biodiversitylibrary.org/oai'
    schema = updated_schema(
        OAISCHEMA, {
            'contributors': ('//dc:creator/node()', '//dc:contributor/node()',
                             aoi_process_contributors_bhl)
        })
    property_list = ['type', 'date', 'relation', 'setSpec', 'rights']

コード例 #18

0

ファイルを表示

class ScholarsbankHarvester(OAIHarvester):
    short_name = 'scholarsbank'
    long_name = 'Scholars Bank University of Oregon'
    url = 'http://scholarsbank.uoregon.edu'
    timezone_granularity = True

    base_url = 'http://scholarsbank.uoregon.edu/oai/request'
    property_list = [
        'type', 'source', 'format', 'relation', 'date', 'description',
        'setSpec', 'identifier'
    ]

    schema = updated_schema(
        OAISCHEMA, {'description': ('//dc:description/node()', second_result)})

コード例 #19

0

ファイルを表示

ファイル: pubmedcentral.py プロジェクト: jeffreyliu3230/scrapi

class PubMedCentralHarvester(OAIHarvester):
    short_name = 'pubmedcentral'
    long_name = 'PubMed Central'
    url = 'http://www.ncbi.nlm.nih.gov/pmc/'

    schema = helpers.updated_schema(
        schemas.OAISCHEMA, {
            "uris": {
                "canonicalUri": ('//ns0:header/ns0:identifier/node()',
                                 helpers.compose(oai_extract_url_pubmedcentral,
                                                 helpers.single_result))
            }
        })

    base_url = 'http://www.pubmedcentral.nih.gov/oai/oai.cgi'
    property_list = [
        'type', 'source', 'rights', 'format', 'setSpec', 'date', 'identifier'
    ]

コード例 #20

0

ファイルを表示

ファイル: test_transformer.py プロジェクト: Johnetordoff/scrapi

    def test_constants(self):
        self.harvester.schema = updated_schema(
            TEST_SCHEMA, {
                'tags': (CONSTANT(['X']), lambda x: x),
                'otherProperties': [{
                    'name': CONSTANT('test'),
                    'properties':{
                        'test':  CONSTANT('test')
                    },
                    'uri': CONSTANT('http://example.com'),
                    'description': CONSTANT('A test field')
                }]
            }
        )
        results = [
            self.harvester.normalize(record) for record in self.harvester.harvest(days_back=1)
        ]

        for result in results:
            assert result['otherProperties'][0]['properties']['test'] == 'test'
            assert result['tags'] == ['X']

コード例 #21

0

ファイルを表示

    def test_constants(self):
        self.harvester.schema = updated_schema(
            TEST_SCHEMA, {
                'tags': (CONSTANT(['X']), lambda x: x),
                'otherProperties': [{
                    'name': CONSTANT('test'),
                    'properties': {
                        'test': CONSTANT('test')
                    },
                    'uri': CONSTANT('http://example.com'),
                    'description': CONSTANT('A test field')
                }]
            })
        results = [
            self.harvester.normalize(record)
            for record in self.harvester.harvest(days_back=1)
        ]

        for result in results:
            assert result['otherProperties'][0]['properties']['test'] == 'test'
            assert result['tags'] == ['X']

コード例 #22

0

ファイルを表示

    def test_arg_kwargs(self):
        def process_title(title, title1="test"):
            return title[0] + (title1[0]
                               if isinstance(title1, list) else title1)

        def process_title2(title1="test"):
            return title1[0] if isinstance(title1, list) else title1

        args = ("//dc:title/node()", )
        kwargs = {"title1": "//dc:title/node()"}

        self.harvester.schema = updated_schema(
            TEST_SCHEMA, {
                'title': (pack(*args, **kwargs), process_title),
                'otherProperties':
                build_properties(
                    ('title2', (pack(*args), process_title)),
                    ('title3', (pack(**kwargs), process_title2)),
                    ('title4',
                     (pack('//dc:title/node()',
                           title1='//dc:title/node()'), process_title)))
            })

        results = [
            self.harvester.normalize(record)
            for record in self.harvester.harvest(days_back=1)
        ]

        for result in results:
            assert result['title'] == "TestTest"
            assert result['otherProperties'][0]['properties'][
                'title2'] == 'Testtest'
            assert result['otherProperties'][1]['properties'][
                'title3'] == 'Test'
            assert result['otherProperties'][2]['properties'][
                'title4'] == "TestTest"

コード例 #23

0

ファイルを表示

ファイル: __init__.py プロジェクト: NeuroVault/scrapi

 def _schema(self):
     return updated_schema(OAISCHEMA, self.formatted_properties)

コード例 #24

0

ファイルを表示

ファイル: utils.py プロジェクト: erinspace/scrapi

    }],
    'description': 'This study seeks to understand how humans impact\
            the dietary patterns of eight free-ranging vervet monkey\
            (Chlorocebus pygerythrus) groups in South Africa using stable\
            isotope analysis.',
    'providerUpdatedDateTime': '2015-02-23T00:00:00',
    'shareProperties': {
        'source': 'test'
    }
}


TEST_SCHEMA = updated_schema(DOESCHEMA, {
    "title": ("//dc:title/node()", lambda x: "Title overwritten"),
    "otherProperties": build_properties(
        ("title1", ("//dc:title/node()", single_result)),
        ("title2", ("//dc:title/node()", lambda x: single_result(x).lower())),
        ("title3", ("//dc:title/node()", "//dc:title/node()", lambda x, y: single_result(x) + single_result(y).lower()))
    )
})


TEST_NAMESPACES = {
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'dcq': 'http://purl.org/dc/terms/'
}


TEST_XML_DOC = b'''
    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcq="http://purl.org/dc/terms/">
        <records count="97" morepages="true" start="1" end="10">

コード例 #25

0

ファイルを表示

ファイル: pubmedcentral.py プロジェクト: erinspace/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": {
             "canonicalUri": ('//ns0:header/ns0:identifier/node()', helpers.compose(oai_extract_url_pubmedcentral, helpers.single_result))
         }
     })

コード例 #26

0

ファイルを表示

ファイル: bhl.py プロジェクト: NeuroVault/scrapi

 def schema(self):
     return updated_schema(self._schema, {
         'contributors': ('//dc:creator/node()', '//dc:contributor/node()', aoi_process_contributors_bhl)
     })

コード例 #27

0

ファイルを表示

 def _schema(self):
     return updated_schema(OAISCHEMA, self.formatted_properties)

コード例 #28

0

ファイルを表示

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris": ('//dc:identifier/node()', '//dc:relation/node()',
                      helpers.oai_process_uris)
         })

コード例 #29

0

ファイルを表示

ファイル: utils.py プロジェクト: jeffreyliu3230/scrapi

            the dietary patterns of eight free-ranging vervet monkey\
            (Chlorocebus pygerythrus) groups in South Africa using stable\
            isotope analysis.',
    'providerUpdatedDateTime':
    '2015-02-23T00:00:00',
    'shareProperties': {
        'source': 'test'
    }
}

TEST_SCHEMA = updated_schema(
    DOESCHEMA, {
        "title": ("//dc:title/node()", lambda x: "Title overwritten"),
        "otherProperties":
        build_properties(
            ("title1", ("//dc:title/node()", single_result)),
            ("title2",
             ("//dc:title/node()", lambda x: single_result(x).lower())),
            ("title3",
             ("//dc:title/node()", "//dc:title/node()",
              lambda x, y: single_result(x) + single_result(y).lower())))
    })

TEST_NAMESPACES = {
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'dcq': 'http://purl.org/dc/terms/'
}

TEST_XML_DOC = '''
    <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcq="http://purl.org/dc/terms/">
        <records count="97" morepages="true" start="1" end="10">

コード例 #30

0

ファイルを表示

ファイル: smithsonian.py プロジェクト: NeuroVault/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": ('//dc:identifier/node()', helpers.oai_process_uris)
     })

コード例 #31

0

ファイルを表示

ファイル: umontreal.py プロジェクト: zamattiac/scrapi

 def schema(self):
     return updated_schema(self._schema, {
         'languages': ('//dc:language/node()', umontreal_language_processor)
     })

コード例 #32

0

ファイルを表示

ファイル: utils.py プロジェクト: Eleonore9/scrapi

    # },
    'description': 'This study seeks to understand how humans impact\
            the dietary patterns of eight free-ranging vervet monkey\
            (Chlorocebus pygerythrus) groups in South Africa using stable\
            isotope analysis.',
    'providerUpdatedDateTime': '2015-02-23T00:00:00',
    'shareProperties': {
        'source': 'crossref'
    }
}


TEST_SCHEMA = updated_schema(BASEXMLSCHEMA, {
    "title": ("//dc:title/node()", lambda x: "Title overwritten"),
    # "otherProperties": {
    #     "title1": "//dc:title/node()",
    #     "title2": ["//dc:title/node()", lambda x: x.lower()],
    #     "title3": ["//dc:title/node()", "//dc:title/node()", lambda x, y: x + y.lower()]
    # }
})


def get_leaves(d, leaves=None):
    if leaves is None:
        leaves = []

    for k, v in d.items():
        if isinstance(v, dict):
            leaves.extend(get_leaves(v, leaves))
        else:
            leaves.append((k, v))

コード例 #33

0

ファイルを表示

ファイル: pcurio.py プロジェクト: AndrewSallans/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": ('//ns0:header/ns0:identifier/node()', '//dc:identifier/node()', oai_process_pcurio)
     })

コード例 #34

0

ファイルを表示

ファイル: pubmedcentral.py プロジェクト: zamattiac/scrapi

 def schema(self):
     return helpers.updated_schema(
         self._schema, {
             "uris": ('//ns0:header/ns0:identifier/node()',
                      '//dc:identifier/node()', format_uris_pubmedcentral)
         })

コード例 #35

0

ファイルを表示

 def schema(self):
     return helpers.updated_schema(
         self._schema,
         {'description': ('//dc:description/node()', second_result)})

コード例 #36

0

ファイルを表示

ファイル: mblwhoilibrary.py プロジェクト: kms6bn/scrapi

 def schema(self):
     return helpers.updated_schema(
         self._schema, {"uris": ("//dc:identifier/node()", "//dc:relation/node()", helpers.oai_process_uris)}
     )

コード例 #37

0

ファイルを表示

ファイル: pubmedcentral.py プロジェクト: AndrewSallans/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": ('//ns0:header/ns0:identifier/node()', '//dc:identifier/node()', format_uris_pubmedcentral)
     })

コード例 #38

0

ファイルを表示

ファイル: dryad.py プロジェクト: erinspace/scrapi

 def schema(self):
     return helpers.updated_schema(self._schema, {
         "uris": {
             "objectUris": ('//dc:relation/node()', '//dc:identifier/node()', format_dois_dryad)
         }
     })