def _get_repo_alt(dataset, resource_getter=None):
    if resource_getter is None:
        resource_getter = get_resources
    resources = resource_getter()
    if resources.has('datasets', dataset, schema.metadata_kw):
        repo = MetadataRepo(
            resources.gets('datasets', dataset, schema.metadata_kw).data)
    else:
        repo = MetadataRepo()
    return repo
Exemple #2
0
    def setUp(self):
        super().setUp()
        faith_pd_values = [1, 2, 3, 4]
        faith_pd_index = ['s01', 's02', 's04', 's05']
        shannon_values = [7.24, 9.05, 8.25]
        shannon_index = ['s01', 's02', 'sOther']
        alpha_resources = AlphaRepo({
            'faith_pd':
            pd.Series(faith_pd_values, index=faith_pd_index),
            'shannon':
            pd.Series(shannon_values, index=shannon_index),
        })
        self.res_patcher = patch(
            'microsetta_public_api.api.plotting._alpha_repo_getter')
        self.mock_resources = self.res_patcher.start()
        self.mock_resources.return_value = alpha_resources

        self.metadata = MetadataRepo(
            pd.DataFrame(
                {
                    'age_cat': ['30s', '40s', '50s', '30s', '30s'],
                    'num_var': [3, 4, 5, 6, 7],
                },
                index=['s01', 's02', 's04', 's05', 'sOther']))
        self.md_patcher = patch(
            'microsetta_public_api.api.plotting._metadata_repo_getter')
        self.mock_metadata = self.md_patcher.start()
        self.mock_metadata.return_value = self.metadata
    def setUp(self):
        TempfileTestCase.setUp(self)
        ConfigTestCase.setUp(self)

        self.metadata_filename = self.create_tempfile(suffix='.qza').name

        self.test_metadata = pd.DataFrame(
            {
                'age_cat': ['30s', '40s', '50s', '30s', np.nan],
                'num_cat': [7.24, 7.24, 8.25, 7.24, np.nan],
                'other': [1, 2, 3, 4, np.nan],
            },
            index=pd.Series(['a', 'b', 'c', 'd', 'e'], name='#SampleID'))
        Metadata(self.test_metadata).save(self.metadata_filename)
        config.resources.update({'metadata': self.metadata_filename})
        resources.update(config.resources)
        self.repo = MetadataRepo()
Exemple #4
0
def plot_alpha_filtered_json_query(body,
                                   alpha_metric=None,
                                   percentiles=None,
                                   sample_id=None):
    repo = MetadataRepo()

    return _plot_alpha_percentiles_querybuilder(alpha_metric, percentiles,
                                                body, repo, sample_id)
def plot_pcoa(beta_metric,
              named_sample_set,
              metadata_categories,
              fillna='nan'):
    pcoa_repo = PCoARepo()
    metadata_repo = MetadataRepo()

    return _plot_pcoa(beta_metric, fillna, metadata_categories,
                      named_sample_set, metadata_repo, pcoa_repo)
def _metadata_repo_getter_alt(dataset=None):
    if dataset is not None:
        metadata_path = ('datasets', dataset, schema.metadata_kw)
    else:
        metadata_path = ('datasets', schema.metadata_kw)

    try:
        return MetadataRepo(get_resources().gets(*metadata_path).data)
    except KeyError:
        raise UnknownResource(f"No metadata (kw: '{schema.metadata_kw}')")
Exemple #7
0
 def test_metadata_values_dne_sample_id_404(self):
     metadata_df = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], ['foo', 'bar', 'baz']],
         columns=['cat', 'fish', 'dog'],
         index=['sample-01', 'sample-02', 'sample-03'],
     )
     with patch('microsetta_public_api.api.metadata._get_repo') as \
             mock_repo:
         mock_repo.return_value = MetadataRepo(metadata_df)
         with self.assertRaises(UnknownID):
             get_metadata_values(body=['sample-01', 'sample-dne'],
                                 cat=['fish', 'dog'])
Exemple #8
0
def plot_alpha_filtered(alpha_metric=None,
                        percentiles=None,
                        sample_id=None,
                        **kwargs):
    repo = MetadataRepo()
    query = _format_query(kwargs)
    is_invalid = _validate_query(kwargs, repo)
    if is_invalid:
        return is_invalid

    return _plot_alpha_percentiles_querybuilder(alpha_metric, percentiles,
                                                query, repo, sample_id)
Exemple #9
0
 def test_metadata_categories(self):
     metadata_df = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6]],
         columns=['cat', 'fish', 'dog'],
     )
     with patch('microsetta_public_api.api.metadata._get_repo') as \
             mock_repo:
         mock_repo.return_value = MetadataRepo(metadata_df)
         response, code = categories()
     self.assertEqual(code, 200)
     exp_values = ['cat', 'fish', 'dog']
     obs = json.loads(response)
     self.assertEqual(obs, exp_values)
Exemple #10
0
 def test_metadata_values(self):
     metadata_df = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], ['foo', 'bar', 'baz']],
         columns=['cat', 'fish', 'dog'],
         index=['sample-01', 'sample-02', 'sample-03'],
     )
     with patch('microsetta_public_api.api.metadata._get_repo') as \
             mock_repo:
         mock_repo.return_value = MetadataRepo(metadata_df)
         response, code = get_metadata_values(
             body=['sample-01', 'sample-03'], cat=['fish', 'dog'])
     self.assertEqual(code, 200)
     exp_values = [[2, 3], ['bar', 'baz']]
     obs = json.loads(response)
     self.assertEqual(obs, exp_values)
class TestMetadataRepo(TempfileTestCase, ConfigTestCase):
    def setUp(self):
        TempfileTestCase.setUp(self)
        ConfigTestCase.setUp(self)

        self.metadata_filename = self.create_tempfile(suffix='.qza').name

        self.test_metadata = pd.DataFrame(
            {
                'age_cat': ['30s', '40s', '50s', '30s', np.nan],
                'num_cat': [7.24, 7.24, 8.25, 7.24, np.nan],
                'other': [1, 2, 3, 4, np.nan],
            },
            index=pd.Series(['a', 'b', 'c', 'd', 'e'], name='#SampleID'))
        Metadata(self.test_metadata).save(self.metadata_filename)
        config.resources.update({'metadata': self.metadata_filename})
        resources.update(config.resources)
        self.repo = MetadataRepo()

    def tearDown(self):
        TempfileTestCase.tearDown(self)
        ConfigTestCase.tearDown(self)

    def test_categories(self):
        exp = ['age_cat', 'num_cat', 'other']
        obs = self.repo.categories
        self.assertCountEqual(exp, obs)

    def test_category_values_string(self):
        exp = ['30s', '40s', '50s']
        obs = self.repo.category_values('age_cat')
        self.assertCountEqual(exp, obs)

    def test_category_values_with_na(self):
        exp = ['30s', '40s', '50s', np.nan]
        obs = self.repo.category_values('age_cat', exclude_na=False)
        self.assertCountEqual(exp, obs)

    def test_category_values_with_na_np_dropped(self):
        exp = ['30s', '40s', '50s']
        obs = self.repo.category_values('age_cat', exclude_na=True)
        self.assertCountEqual(exp, obs)

    def test_category_values_numeric(self):
        exp = [7.24, 8.25]
        obs = self.repo.category_values('num_cat')
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_multiple_category(self):
        exp = ['a', 'd']
        query = {
            "condition":
            "AND",
            "rules": [{
                "id": "age_cat",
                "operator": "equal",
                "value": "30s",
            }, {
                "id": "num_cat",
                "operator": "equal",
                "value": 7.24,
            }]
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_nested(self):
        exp = ['a', 'c', 'd']
        query = {
            "condition":
            "OR",
            "rules": [
                {
                    "condition":
                    "AND",
                    "rules": [{
                        "id": "age_cat",
                        "operator": "equal",
                        "value": "30s",
                    }, {
                        "id": "num_cat",
                        "operator": "equal",
                        "value": 7.24,
                    }]
                },
                {
                    "id": "other",
                    "operator": "greater_or_equal",
                    "value": 3,
                },
            ],
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_single_category(self):
        exp = ['c']
        query = {
            "condition": "AND",
            "rules": [
                {
                    "id": "age_cat",
                    "operator": "equal",
                    "value": "50s",
                },
            ]
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_no_category(self):
        exp = ['a', 'b', 'c', 'd', 'e']
        query = {"condition": "AND", "rules": []}
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_ill_formed_query_no_condition(self):
        query = {
            "rules": [{
                "id": "age_cat",
                "operator": "equal",
                "value": "50s",
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'does not appear to be a '
                r'rule or a group'):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_bad_rule(self):
        query = {
            "condition": "AND",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'does not appear to be a '
                r'rule or a group'):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_unsupported_condition(self):
        query = {
            "condition": "XOR",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
                "operator": "equal"
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'Only conditions in (.*) '
                r'are supported. Got '):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_unsupported_operator(self):
        query = {
            "condition":
            "AND",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
                "operator": "something_weird"
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'Only operators in (.*) '
                r'are supported. Got '):
            self.repo.sample_id_matches(query)
def _metadata_repo_getter():
    return MetadataRepo()
Exemple #13
0
 def test_construct_from_dataframe(self):
     new_repo = MetadataRepo(self.repo._metadata)
     pd.testing.assert_frame_equal(new_repo._metadata, self.repo._metadata)
Exemple #14
0
class TestMetadataRepo(TempfileTestCase, ConfigTestCase):
    def setUp(self):
        TempfileTestCase.setUp(self)
        ConfigTestCase.setUp(self)

        self.metadata_filename = self.create_tempfile(suffix='.qza').name

        self.test_metadata = pd.DataFrame(
            {
                'age_cat': ['30s', '40s', '50s', '30s', np.nan],
                'num_cat': [7.24, 7.24, 8.25, 7.24, np.nan],
                'other': [1, 2, 3, 4, np.nan],
            },
            index=pd.Series(['a', 'b', 'c', 'd', 'e'], name='#SampleID'))
        Metadata(self.test_metadata).save(self.metadata_filename)
        config.resources.update({'metadata': self.metadata_filename})
        resources.update(config.resources)
        self.repo = MetadataRepo()

    def tearDown(self):
        TempfileTestCase.tearDown(self)
        ConfigTestCase.tearDown(self)

    def test_construct_from_dataframe(self):
        new_repo = MetadataRepo(self.repo._metadata)
        pd.testing.assert_frame_equal(new_repo._metadata, self.repo._metadata)

    def test_categories(self):
        exp = ['age_cat', 'num_cat', 'other']
        obs = self.repo.categories
        self.assertCountEqual(exp, obs)

    def test_category_values_string(self):
        exp = ['30s', '40s', '50s']
        obs = self.repo.category_values('age_cat')
        self.assertCountEqual(exp, obs)

    def test_category_values_with_na(self):
        exp = ['30s', '40s', '50s', np.nan]
        obs = self.repo.category_values('age_cat', exclude_na=False)
        self.assertCountEqual(exp, obs)

    def test_category_values_with_na_np_dropped(self):
        exp = ['30s', '40s', '50s']
        obs = self.repo.category_values('age_cat', exclude_na=True)
        self.assertCountEqual(exp, obs)

    def test_category_values_numeric(self):
        exp = [7.24, 8.25]
        obs = self.repo.category_values('num_cat')
        self.assertCountEqual(exp, obs)

    def test_samples(self):
        obs = self.repo.samples
        exp = self.test_metadata.index
        self.assertListEqual(obs, exp.values.tolist())

    def test_has_category_single(self):
        obs = self.repo.has_category('num_cat')
        self.assertTrue(obs)
        obs = self.repo.has_category('dne')
        self.assertFalse(obs)

    def test_has_category_group(self):
        obs = self.repo.has_category(['num_cat', 'none', 'other'])
        self.assertListEqual(obs, [True, False, True])

    def test_has_sample_id_single(self):
        obs = self.repo.has_sample_id('b')
        self.assertTrue(obs)
        obs = self.repo.has_sample_id('None')
        self.assertFalse(obs)

    def test_has_sample_id_group(self):
        obs = self.repo.has_sample_id(['a', 'q', 'c'])
        self.assertListEqual(obs, [True, False, True])

    def test_get_metadata(self):
        obs = self.repo.get_metadata(['num_cat', 'other'])
        # checking the value here is a little weird because it is doing a
        # conversion. Harcoding based on values in setUp
        exp = {
            'num_cat': {
                'a': 7.24,
                'b': 7.24,
                'c': 8.25,
                'd': 7.24,
                'e': None
            },
            'other': {
                'a': 1.0,
                'b': 2.0,
                'c': 3.0,
                'd': 4.0,
                'e': None
            },
        }
        self.assertDictEqual(obs.to_dict(), exp)

        obs = self.repo.get_metadata('num_cat')
        exp = {'a': 7.24, 'b': 7.24, 'c': 8.25, 'd': 7.24, 'e': None}
        self.assertDictEqual(obs.to_dict(), exp)

        obs = self.repo.get_metadata('num_cat', sample_ids=['a', 'b'])
        exp = {
            'a': 7.24,
            'b': 7.24,
        }
        self.assertDictEqual(obs.to_dict(), exp)

        obs = self.repo.get_metadata(
            ['num_cat', 'other'],
            sample_ids=['a', 'one'],
            fillna='nan',
        )
        exp = {
            'num_cat': {
                'a': 7.24,
                'one': 'nan',
            },
            'other': {
                'a': 1.0,
                'one': 'nan',
            },
        }
        self.assertDictEqual(obs.to_dict(), exp)

    def test_category_sample_id_matches_query_multiple_category(self):
        exp = ['a', 'd']
        query = {
            "condition":
            "AND",
            "rules": [{
                "id": "age_cat",
                "operator": "equal",
                "value": "30s",
            }, {
                "id": "num_cat",
                "operator": "equal",
                "value": 7.24,
            }]
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_nested(self):
        exp = ['a', 'c', 'd']
        query = {
            "condition":
            "OR",
            "rules": [
                {
                    "condition":
                    "AND",
                    "rules": [{
                        "id": "age_cat",
                        "operator": "equal",
                        "value": "30s",
                    }, {
                        "id": "num_cat",
                        "operator": "equal",
                        "value": 7.24,
                    }]
                },
                {
                    "id": "other",
                    "operator": "greater_or_equal",
                    "value": 3,
                },
            ],
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_single_category(self):
        exp = ['c']
        query = {
            "condition": "AND",
            "rules": [
                {
                    "id": "age_cat",
                    "operator": "equal",
                    "value": "50s",
                },
            ]
        }
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_matches_query_no_category(self):
        exp = ['a', 'b', 'c', 'd', 'e']
        query = {"condition": "AND", "rules": []}
        obs = self.repo.sample_id_matches(query)
        self.assertCountEqual(exp, obs)

    def test_category_sample_id_ill_formed_query_no_condition(self):
        query = {
            "rules": [{
                "id": "age_cat",
                "operator": "equal",
                "value": "50s",
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'does not appear to be a '
                r'rule or a group'):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_bad_rule(self):
        query = {
            "condition": "AND",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'does not appear to be a '
                r'rule or a group'):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_unsupported_condition(self):
        query = {
            "condition": "XOR",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
                "operator": "equal"
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'Only conditions in (.*) '
                r'are supported. Got '):
            self.repo.sample_id_matches(query)

    def test_category_sample_id_ill_formed_query_unsupported_operator(self):
        query = {
            "condition":
            "AND",
            "rules": [{
                "id": "age_cat",
                "value": "50s",
                "operator": "something_weird"
            }]
        }
        with self.assertRaisesRegex(
                ValueError, r'Only operators in (.*) '
                r'are supported. Got '):
            self.repo.sample_id_matches(query)