Exemple #1
0
def test_mosdef_only(testresourcepath, expected_modout1):
    modin = newmodel()
    literate.parse(INPUT_GRAPH_1, modin)

    modin = newmodel()
    literate.parse(INPUT_GRAPH_1, modin)

    FINGERPRINT_RULES = {
        SCH_NS('MusicAlbum'):
        (if_(contains(follow(SCH_NS('byArtist')), DOC_NS('md')),
             materialize(COPY()))),
        SCH_NS('Person'): (materialize(COPY())),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, {}, {})

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('=' * 10, 'test_mosdef_only', '=' * 10)
    literate.write(modout)
    # import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 17
    assert len(
        list(util.all_origins(modout, only_types={SCH_NS('MusicAlbum')}))) == 1
    assert len(list(util.all_origins(modout,
                                     only_types={SCH_NS('Person')}))) == 3
Exemple #2
0
def test_basics_1(testresourcepath, expected_modout1):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(
        open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): (materialize(
            BF_NS('Instance'),
            fprint=[
                (BF_NS('isbn'), follow(SCH_NS('isbn'))),
            ],
        ))
    }

    TRANSFORM_RULES = {
        SCH_NS('name'):
        link(rel=BF_NS('name')),
        SCH_NS('author'):
        materialize(BF_NS('Person'),
                    BF_NS('creator'),
                    vars={
                        'birthDate':
                        follow(SCH_NS('authorBirthDate'),
                               origin=var('input-resource'))
                    },
                    fprint=[
                        (BF_NS('name'), target()),
                        (BF_NS('birthDate'), var('birthDate')),
                    ],
                    links=[
                        (BF_NS('name'), target()),
                        (BF_NS('birthDate'), var('birthDate')),
                    ]),
    }

    modout = newmodel()

    def new_entity_hook(eid):
        # Add a triple to each materialized resource
        modout.add(eid, 'http://example.org/materializedBy', 'py.test')
        return

    ctxextras = {'@new-entity-hook': new_entity_hook}
    root_ctx = DUMMY_CONTEXT.copy(output_model=modout, extras=ctxextras)

    ppl = generic_pipeline(FINGERPRINT_RULES,
                           TRANSFORM_RULES,
                           LABELIZE_RULES,
                           root_ctx=root_ctx)

    ppl.run(input_model=modin, output_model=modout)
    # Use -s to see this
    print('=' * 10, 'test_basics_1', '=' * 10)
    literate.write(modout)

    assert len(
        list(modout.match(None, 'http://example.org/materializedBy',
                          None))) == 2
Exemple #3
0
def Xtest_versa_syntax1():
    # logging.debug(recs)
    m = newmodel()
    m.create_space()
    # from_markdown(VERSA_LITERATE1, m, encoding='utf-8')
    literate.parse(VERSA_LITERATE1, m)
    logging.debug('VERSA LITERATE EXAMPLE 1')
    for link in m.match():
        logging.debug('Result: {0}'.format(repr(link)))
Exemple #4
0
def test_versa_syntax1(testresourcepath):
    config = {
        'autotype-h1': 'http://example.org/r1',
        'autotype-h2': 'http://example.org/r2',
        'interpretations': {
            VERSA_BASEIRI + 'refines': VERSA_BASEIRI + 'resourceset',
            VERSA_BASEIRI + 'properties': VERSA_BASEIRI + 'resourceset',
            VERSA_BASEIRI + 'synonyms': VERSA_BASEIRI + 'resourceset'
        }
    }

    m1 = newmodel(baseiri='http://example.org/')
    # from_markdown(VERSA_LITERATE1, m, encoding='utf-8')
    doc = open(os.path.join(testresourcepath, 'doc1.md')).read()
    literate.parse(doc, m1, config=config)

    m2 = newmodel(baseiri='http://example.org/')
    # from_markdown(VERSA_LITERATE1, m, encoding='utf-8')
    doc = open(os.path.join(testresourcepath, 'doc1.abbr.md')).read()
    literate.parse(doc, m2, config=config)

    # logging.debug('VERSA LITERATE EXAMPLE 1')
    equiv_results = [list(m1.match()), list(m2.match())]
    for results in equiv_results:
        # import pprint; pprint.pprint(results)
        assert len(results) == 6
        assert (I('http://uche.ogbuji.net/ndewo/'),
                I('http://bibfra.me/purl/versa/type'), 'http://example.org/r1',
                {}) in results
        assert (I('http://uche.ogbuji.net/ndewo/'),
                I('http://www.w3.org/TR/html5/title'), 'Ndewo, Colorado', {
                    '@lang': None
                }) in results
        assert (I('http://uche.ogbuji.net/ndewo/'),
                I('http://www.w3.org/TR/html5/link-type/author'),
                I('http://uche.ogbuji.net/'), {
                    I('http://www.w3.org/TR/html5/link/description'):
                    'Uche Ogbuji'
                }) in results
        assert (
            I('http://uche.ogbuji.net/ndewo/'),
            I('http://www.w3.org/TR/html5/link-type/see-also'),
            I('http://www.goodreads.com/book/show/18714145-ndewo-colorado'), {
                I('http://www.w3.org/TR/html5/link/label'): 'Goodreads'
            }) in results
        assert (I('http://uche.ogbuji.net/'),
                I('http://bibfra.me/purl/versa/type'), 'http://example.org/r1',
                {}) in results
        assert (I('http://uche.ogbuji.net/'),
                I('http://www.w3.org/TR/html5/link-type/see-also'),
                I('http://uche.ogbuji.net/ndewo/'), {}) in results
Exemple #5
0
def test_basics_1(testresourcepath, expected_modout1):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): ( 
            materialize(BF_NS('Instance'),
                fprint=[
                    (BF_NS('isbn'), follow(SCH_NS('isbn'))),
                ],
            )
        )
    }

    TRANSFORM_RULES = {
        SCH_NS('name'): link(rel=BF_NS('name')),

        SCH_NS('author'): materialize(BF_NS('Person'),
                                    BF_NS('creator'),
                                    vars={
                                        'birthDate': follow(SCH_NS('authorBirthDate'),
                                            origin=var('input-resource'))
                                    },
                                    fprint=[
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    links=[
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ]
        ),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_1', '='*10)
    literate.write(modout)

    assert len(modout) == 8
    assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1
    assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
Exemple #6
0
def test_basics_2(testresourcepath):
    modin = newmodel()
    modin_fpath = 'schemaorg/catcherintherye-ugly.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)

    FINGERPRINT_RULES = {
        SCH_NS('Book'): ( 
            materialize(var('itype'),
                fprint=[
                    (BF_NS('isbn'), follow(SCH_NS('isbn'))),
                ],
                links=[
                    (BF_NS('instantiates'),
                        materialize(BF_NS('Work'),
                            fprint=[
                                (BF_NS('name'), follow(SCH_NS('title'))),
                                (BF_NS('creator'), follow(SCH_NS('author'))),
                                (BF_NS('language'), var('lang')),
                            ],
                            links=[('http://instantiated-by', var('@stem'))],
                            attach=False # Can remove when we have smart sessions to avoid duplicate instantiates links
                        ),
                    )
                ],
                # Not really necessary; just testing vars in this scenario
                vars={
                    'lang': follow(SCH_NS('inLanguage')),
                    'itype': BF_NS('Instance')
                    }
            )
        )
    }

    TRANSFORM_RULES = {
        # Rule for output resource type of Work or Instance
        (SCH_NS('name'), WT, IT): link(rel=BF_NS('name')),

        # Rule only for output resource type of Work
        (SCH_NS('author'), WT): materialize(BF_NS('Person'),
                                    BF_NS('creator'),
                                    vars={
                                        'birthDate': follow(SCH_NS('authorBirthDate'),
                                            origin=var('input-resource'))
                                    },
                                    fprint=[
                                        # Supplementary type
                                        (VTYPE_REL, SCH_NS('Novelist')),
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    links=[
                                        # Supplementary type
                                        (VTYPE_REL, SCH_NS('Novelist')),
                                        (BF_NS('name'), target()),
                                        (BF_NS('birthDate'), var('birthDate')),
                                    ],
                                    preserve_fprint=True,
        ),
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_2', '='*10)
    literate.write(modout)
    #import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 15
    assert len(list(util.all_origins(modout, only_types={BF_NS('Instance')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Work')}))) == 1
    assert len(list(util.all_origins(modout, only_types={BF_NS('Person')}))) == 1
    assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
Exemple #7
0
def test_basics_4(testresourcepath):
    '''
    Convert from schema.org to [MusicBrainz scheme](https://musicbrainz.org/doc/MusicBrainz_Database/Schema)
    '''
    import sys # Uncomment to debug
    MB_NS = I('https://musicbrainz.org/doc/MusicBrainz_Database/Schema/')
    R_TYP = MB_NS('Release')
    RG_TYP = MB_NS('ReleaseGroup')
    A_TYP = MB_NS('Artist')
    DOC_NS = I('http://example.org/records/')

    modin = newmodel()
    modin_fpath = 'schemaorg/blackstar.md'
    literate.parse(open(os.path.join(testresourcepath, modin_fpath)).read(), modin)
    # Hand-add a comment property to the Mos Def resource to test that this value doesn't bleed e.g. to Kweli's output
    modin.add(DOC_NS('md'), SCH_NS('comment'), 'test')

    FINGERPRINT_RULES = {
        SCH_NS('MusicAlbum'): ( 
            materialize(MB_NS('ReleaseGroup'),
                fprint=[
                    (MB_NS('title'), follow(SCH_NS('name'))),
                    (MB_NS('artist'), follow(SCH_NS('byArtist'), SCH_NS('name'))),
                ],
                links=[
                    (MB_NS('contains'), materialize(MB_NS('Release'),
                        fprint=[
                            (MB_NS('catalogue-number'), var('catnum')),
                        ],
                        links=[
                            (MB_NS('catalogue-number'), var('catnum')),
                        ]
                    ))
                ],
                vars={'catnum': follow(SCH_NS('catalogNumber'))},
                # debug=sys.stderr, # Uncomment to debug
            )
        ),

        SCH_NS('Person'): ( 
            materialize(MB_NS('Artist'),
                fprint=[
                    (MB_NS('name'), var('aname')),
                ],
                links=[
                    (MB_NS('name'), var('aname')),
                    (MB_NS('remark'), var('comment')),
                ],
                vars={'aname': follow(SCH_NS('name')), 'comment': follow(SCH_NS('comment'))},
            )
        )
    }

    TRANSFORM_RULES = {
        (SCH_NS('name'), R_TYP, RG_TYP): link(rel=MB_NS('title')),

        (SCH_NS('byArtist'), R_TYP): link(rel=MB_NS('by'), target=lookup('@resource')),
    }

    # Intentionally shadows the global LABELIZE_RULES
    LABELIZE_RULES = {
        MB_NS('ReleaseGroup'): follow(MB_NS('title')),
        MB_NS('Release'): follow(MB_NS('title')),
        MB_NS('Artist'): follow(MB_NS('name'))
    }

    ppl = generic_pipeline(FINGERPRINT_RULES, TRANSFORM_RULES, LABELIZE_RULES)

    modout = ppl.run(input_model=modin)
    # Use -s to see this
    print('='*10, 'test_basics_4', '='*10)
    literate.write(modout)
    # import pprint; pprint.pprint(list(iter(modout)))

    assert len(modout) == 16
    assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1
    assert len(list(util.all_origins(modout, only_types={MB_NS('ReleaseGroup')}))) == 1
    assert len(list(util.all_origins(modout, only_types={MB_NS('Artist')}))) == 2
    # assert len(list(modout.match(None, BF_NS('birthDate'), '1919-01-01'))) == 1
    # DOC_NS('md') -> I('i5GvPVm7ClA') in the transform
    assert [ l[0] for l in modout.match(None, MB_NS('remark'), 'test')] == [I('i5GvPVm7ClA')]
Exemple #8
0
    def labelize(self):
        '''
        Executes a utility rule to create labels in output model for new resources
        '''
        # XXX Check if there's already a label?
        # Apply a common transform strategy using rules defined above
        def missed_label(origin, type):
            '''
            Callback to handle cases where a transform wasn't found to match a link (by relationship) in the input model
            '''
            warnings.warn(f'No label generated for: {origin}')
        labels = self.labelize_helper(LABELIZE_RULES, handle_misses=missed_label)
        return True


if __name__ == '__main__':
    for rec in INPUT_RECORDS:
        ppl = dc_schema_pipeline()
        input_model = newmodel()
        literate.parse(rec, input_model)
        output_model = ppl.run(input_model=input_model)
        print('Resulting record Fingerprints:', ppl.fingerprints)
        print('Low level JSON dump of output data model: ')
        util.jsondump(output_model, sys.stdout)
        print('Versa literate form of output: ')
        literate.write(output_model, out=sys.stdout)
        # from versa.serial import mermaid
        # print('Mermaid diagram form of output: ')
        # mermaid.write(output_model, out=sys.stdout)