Example #1
0
 def test_check_object_field_error(self):
     er = ExternalResources('terms')
     data = Data(name="species", data=['H**o sapiens', 'Mus musculus'])
     er._check_object_field(data, '')
     er._add_object(data, '', '')
     with self.assertRaises(ValueError):
         er._check_object_field(data, '')
Example #2
0
    def test_object_key_unqiueness(self):
        er = ExternalResources('terms')
        data = Data(name='data_name',
                    data=np.array([('Mus musculus', 9, 81.0),
                                   ('H**o sapien', 3, 27.0)],
                                  dtype=[('species', 'U14'), ('age', 'i4'),
                                         ('weight', 'f4')]))

        er.add_ref(
            container=data,
            key='Mus musculus',
            resource_name='NCBI_Taxonomy',
            resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
            entity_id='NCBI:txid10090',
            entity_uri=
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
        )
        existing_key = er.get_key('Mus musculus')
        er.add_ref(container=data,
                   key=existing_key,
                   resource_name='resource2',
                   resource_uri='resource_uri2',
                   entity_id='entity2',
                   entity_uri='entity_uri2')

        self.assertEqual(er.object_keys.data, [(0, 0)])
Example #3
0
    def test_get_object_resources(self):
        er = ExternalResources('terms')
        data = Data(name='data_name',
                    data=np.array([('Mus musculus', 9, 81.0),
                                   ('H**o sapien', 3, 27.0)],
                                  dtype=[('species', 'U14'), ('age', 'i4'),
                                         ('weight', 'f4')]))

        er.add_ref(
            container=data,
            key='Mus musculus',
            resource_name='NCBI_Taxonomy',
            resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
            entity_id='NCBI:txid10090',
            entity_uri=
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
        )
        received = er.get_object_resources(data)
        expected = pd.DataFrame(data=[[
            0, 0, 'NCBI:txid10090',
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
        ]],
                                columns=[
                                    'keys_idx', 'resource_idx', 'entity_id',
                                    'entity_uri'
                                ])
        pd.testing.assert_frame_equal(received, expected)
Example #4
0
    def test_check_object_field_add(self):
        er = ExternalResources('terms')
        data = Data(name="species", data=['H**o sapiens', 'Mus musculus'])
        er._check_object_field('uuid1', '')
        er._check_object_field(data, '')

        self.assertEqual(er.objects.data, [('uuid1', '', ''),
                                           (data.object_id, '', '')])
Example #5
0
 def test_add_ref(self):
     er = ExternalResources('terms')
     data = Data(name="species", data=['H**o sapiens', 'Mus musculus'])
     er.add_ref(container=data,
                key='key1',
                resource_name='resource1',
                resource_uri='uri1',
                entity_id='entity_id1',
                entity_uri='entity1')
     self.assertEqual(er.keys.data, [('key1', )])
     self.assertEqual(er.resources.data, [('resource1', 'uri1')])
     self.assertEqual(er.entities.data, [(0, 0, 'entity_id1', 'entity1')])
     self.assertEqual(er.objects.data, [(data.object_id, '', '')])
Example #6
0
    def test_add_ref_compound_data(self):
        er = ExternalResources(name='example')

        data = Data(name='data_name',
                    data=np.array([('Mus musculus', 9, 81.0),
                                   ('H**o sapiens', 3, 27.0)],
                                  dtype=[('species', 'U14'), ('age', 'i4'),
                                         ('weight', 'f4')]))
        er.add_ref(container=data,
                   field='species',
                   key='Mus musculus',
                   resource_name='NCBI_Taxonomy',
                   resource_uri='resource0_uri',
                   entity_id='NCBI:txid10090',
                   entity_uri='entity_0_uri')
        self.assertEqual(er.keys.data, [('Mus musculus', )])
        self.assertEqual(er.resources.data,
                         [('NCBI_Taxonomy', 'resource0_uri')])
        self.assertEqual(er.entities.data,
                         [(0, 0, 'NCBI:txid10090', 'entity_0_uri')])
        self.assertEqual(er.objects.data, [(data.object_id, '', 'species')])
Example #7
0
    def test_to_dataframe(self):
        # Setup complex external resources with keys reused across objects and
        # multiple resources per key
        er = ExternalResources(name='example')
        # Add a species dataset with 2 keys
        data1 = Data(name='data_name',
                     data=np.array([('Mus musculus', 9, 81.0),
                                    ('H**o sapiens', 3, 27.0)],
                                   dtype=[('species', 'U14'), ('age', 'i4'),
                                          ('weight', 'f4')]))
        k1, r1, e1 = er.add_ref(
            container=data1,
            field='species',
            key='Mus musculus',
            resource_name='NCBI_Taxonomy',
            resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
            entity_id='NCBI:txid10090',
            entity_uri=
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090'
        )
        k2, r2, e2 = er.add_ref(
            container=data1,
            field='species',
            key='H**o sapiens',
            resource_name='NCBI_Taxonomy',
            resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
            entity_id='NCBI:txid9606',
            entity_uri=
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606')
        # Add a second species dataset that uses the same keys as the first dataset and add an additional key
        data2 = Data(name="species",
                     data=['H**o sapiens', 'Mus musculus', 'Pongo abelii'])
        o2 = er._add_object(data2, relative_path='', field='')
        er._add_object_key(o2, k1)
        er._add_object_key(o2, k2)
        k2, r2, e2 = er.add_ref(
            container=data2,
            field='',
            key='Pongo abelii',
            resource_name='NCBI_Taxonomy',
            resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
            entity_id='NCBI:txid9601',
            entity_uri=
            'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601')
        # Add a third data object, this time with 2 entities for a key
        data3 = Data(name="genotypes", data=['Rorb'])
        k3, r3, e3 = er.add_ref(
            container=data3,
            field='',
            key='Rorb',
            resource_name='MGI Database',
            resource_uri='http://www.informatics.jax.org/',
            entity_id='MGI:1346434',
            entity_uri='http://www.informatics.jax.org/marker/MGI:1343464')
        _ = er.add_ref(
            container=data3,
            field='',
            key=k3,
            resource_name='Ensembl',
            resource_uri='https://uswest.ensembl.org/index.html',
            entity_id='ENSG00000198963',
            entity_uri=
            'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'
        )
        # Convert to dataframe and compare against the expected result
        result_df = er.to_dataframe()
        expected_df_data = \
            {'objects_idx': {0: 0, 1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
             'object_id': {0: data1.object_id, 1: data1.object_id,
                           2: data2.object_id, 3: data2.object_id, 4: data2.object_id,
                           5: data3.object_id, 6: data3.object_id},
             'field': {0: 'species', 1: 'species', 2: '', 3: '', 4: '', 5: '', 6: ''},
             'keys_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 3},
             'key': {0: 'Mus musculus', 1: 'H**o sapiens', 2: 'Mus musculus', 3: 'H**o sapiens',
                     4: 'Pongo abelii', 5: 'Rorb', 6: 'Rorb'},
             'resources_idx': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 2},
             'resource': {0: 'NCBI_Taxonomy', 1: 'NCBI_Taxonomy', 2: 'NCBI_Taxonomy', 3: 'NCBI_Taxonomy',
                          4: 'NCBI_Taxonomy', 5: 'MGI Database', 6: 'Ensembl'},
             'resource_uri': {0: 'https://www.ncbi.nlm.nih.gov/taxonomy', 1: 'https://www.ncbi.nlm.nih.gov/taxonomy',
                              2: 'https://www.ncbi.nlm.nih.gov/taxonomy', 3: 'https://www.ncbi.nlm.nih.gov/taxonomy',
                              4: 'https://www.ncbi.nlm.nih.gov/taxonomy', 5: 'http://www.informatics.jax.org/',
                              6: 'https://uswest.ensembl.org/index.html'},
             'entities_idx': {0: 0, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4},
             'entity_id': {0: 'NCBI:txid10090', 1: 'NCBI:txid9606', 2: 'NCBI:txid10090', 3: 'NCBI:txid9606',
                           4: 'NCBI:txid9601', 5: 'MGI:1346434', 6: 'ENSG00000198963'},
             'entity_uri': {0: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090',
                            1: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606',
                            2: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=10090',
                            3: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606',
                            4: 'https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9601',
                            5: 'http://www.informatics.jax.org/marker/MGI:1343464',
                            6: 'https://uswest.ensembl.org/Homo_sapiens/Gene/Summary?db=core;g=ENSG00000198963'}}
        expected_df = pd.DataFrame.from_dict(expected_df_data)
        pd.testing.assert_frame_equal(result_df, expected_df)

        # Convert to dataframe with categories and compare against the expected result
        result_df = er.to_dataframe(use_categories=True)
        cols_with_categories = [('objects', 'objects_idx'),
                                ('objects', 'object_id'), ('objects', 'field'),
                                ('keys', 'keys_idx'), ('keys', 'key'),
                                ('resources', 'resources_idx'),
                                ('resources', 'resource'),
                                ('resources', 'resource_uri'),
                                ('entities', 'entities_idx'),
                                ('entities', 'entity_id'),
                                ('entities', 'entity_uri')]
        expected_df_data = {
            c: expected_df_data[c[1]]
            for c in cols_with_categories
        }
        expected_df = pd.DataFrame.from_dict(expected_df_data)
        pd.testing.assert_frame_equal(result_df, expected_df)
Example #8
0
# Ignore experimental feature warnings in the tutorial to improve rendering
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="ExternalResources is experimental*")

er = ExternalResources(name='example')

###############################################################################
# Using the add_ref method
# ------------------------------------------------------
# :py:func:`~hdmf.common.resources.ExternalResources.add_ref`
# is a wrapper function provided by the ``ExternalResources`` class that
# simplifies adding data. Using ``add_ref`` allows us to treat new entries similar
# to adding a new row to a flat table, with ``add_ref`` taking care of populating
# the underlying data structures accordingly.

data = Data(name="species", data=['H**o sapiens', 'Mus musculus'])
er.add_ref(
    container=data,
    key='H**o sapiens',
    resource_name='NCBI_Taxonomy',
    resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
    entity_id='NCBI:txid9606',
    entity_uri='https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=9606'
)

key, resource, entity = er.add_ref(
    container=data,
    key='Mus musculus',
    resource_name='NCBI_Taxonomy',
    resource_uri='https://www.ncbi.nlm.nih.gov/taxonomy',
    entity_id='NCBI:txid10090',