def test_register_unregister_synonym_key(self): TaxonomyManager.register_synonym_key("synonym_key_1") self.assertRaises(RecordAlreadyExistsError, TaxonomyManager.register_synonym_key, "synonym_key_1") df = TaxonomyManager.get_synonym_keys() self.assertEqual(len(df), 1) TaxonomyManager.unregister_synonym_key("synonym_key_1") self.assertRaises(NoRecordFoundError, TaxonomyManager.unregister_synonym_key, "synonym_key") df = TaxonomyManager.get_synonym_keys() self.assertEqual(len(df), 0) # Test with bind with Connector.get_connection() as connection: TaxonomyManager.register_synonym_key("test", bind=connection) TaxonomyManager.assert_synonym_key_exists("test", bind=connection) TaxonomyManager.unregister_synonym_key("test", bind=connection) TaxonomyManager.assert_synonym_key_does_not_exists("test", bind=connection)
def _process(self, *args, include_mptt=False, include_synonyms=False, flatten=False, **kwargs): """ Return the taxon dataframe. :param include_mptt: If True, include the mptt columns. :param include_synonyms: If True, include the stored synonyms for each taxon. :param flatten: If True, flattens the taxonomy hierarchy and include it in the resulting dataframe. """ with Connector.get_connection() as connection: keys = TaxonomyManager.get_synonym_keys()['name'] synonyms = [] if include_synonyms: synonyms = [ meta.taxon.c.synonyms[k].label(k) for k in keys if k != 'niamoto' ] mptt = [] if include_mptt: mptt = [ meta.taxon.c.mptt_left.label('mptt_left'), meta.taxon.c.mptt_right.label('mptt_right'), meta.taxon.c.mptt_tree_id.label('mptt_tree_id'), meta.taxon.c.mptt_depth.label('mptt_depth'), ] sel = select([ meta.taxon.c.id.label('id'), meta.taxon.c.full_name.label('full_name'), meta.taxon.c.rank_name.label('rank_name'), cast(meta.taxon.c.rank, String).label('rank'), meta.taxon.c.parent_id.label('parent_id'), ] + synonyms + mptt) df = pd.read_sql(sel, connection, index_col='id') # Replace None values with nan df.fillna(value=pd.np.NAN, inplace=True) if flatten: df = _flatten(df) return df, [], {'index_label': 'id'}
def test_get_synonym_keys(self): df = TaxonomyManager.get_synonym_keys() self.assertEqual(len(df), 0)
def test_set_taxonomy(self): result, synonyms = TaxonomyManager.set_taxonomy( pd.DataFrame( columns=['full_name', 'rank_name', 'rank', 'parent_id'], )) self.assertEqual(result, 0) data = pd.DataFrame.from_records([ { 'id': 0, 'full_name': 'Family One', 'rank_name': 'One', 'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA, 'parent_id': None, }, ], index='id') result, synonyms = TaxonomyManager.set_taxonomy(data) self.assertEqual(result, 1) data = pd.DataFrame.from_records([ { 'id': 0, 'full_name': 'Family One', 'rank_name': 'One', 'rank': niamoto_db_meta.TaxonRankEnum.FAMILIA, 'parent_id': None, 'gbif': 5, 'taxref': 1, }, { 'id': 1, 'full_name': 'Genus Two', 'rank_name': 'Two', 'rank': niamoto_db_meta.TaxonRankEnum.GENUS, 'parent_id': 0, 'gbif': 10, 'taxref': 2, }, { 'id': 2, 'full_name': 'Species Three', 'rank_name': 'Three', 'rank': niamoto_db_meta.TaxonRankEnum.SPECIES, 'parent_id': None, 'gbif': 7, 'taxref': 3, }, ], index='id') result, synonyms = TaxonomyManager.set_taxonomy(data) self.assertEqual(result, 3) df = TaxonomyManager.get_raw_taxon_dataframe() self.assertEqual(len(df), 3) synonym_keys = TaxonomyManager.get_synonym_keys() self.assertEqual(len(synonym_keys), 2) identity_synonyms = TaxonomyManager.get_synonyms_for_key( TaxonomyManager.IDENTITY_SYNONYM_KEY) self.assertEqual(len(identity_synonyms), 3) null_synonyms = TaxonomyManager.get_synonyms_for_key(None) self.assertEqual(len(null_synonyms), 0) gbif_synonyms = TaxonomyManager.get_synonyms_for_key("gbif") self.assertEqual(len(gbif_synonyms), 3) taxref_synonyms = TaxonomyManager.get_synonyms_for_key('taxref') self.assertEqual(len(taxref_synonyms), 3)