Esempio n. 1
0
    def to_bel(self) -> BELGraph:
        """Return a new graph corresponding to the pathway.

        Example Usage:

        >>> manager = Manager()
        >>> manager.get_pathway_graph_by_id('WP61') # Notch signaling pathway
        """
        graph = BELGraph(
            name='WikiPathways Associations',
            version='1.0.0',
        )

        wikipathways_namespace = self.upload_bel_namespace()
        graph.namespace_url[
            wikipathways_namespace.keyword] = wikipathways_namespace.url

        hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine,
                                            session=self.session)
        hgnc_namespace = hgnc_manager.upload_bel_namespace()
        graph.namespace_url[hgnc_namespace.keyword] = hgnc_namespace.url

        for pathway in tqdm(self.get_all_pathways(),
                            total=self._count_model(Pathway)):
            for protein in pathway.proteins:
                pathway_bel = pathway.serialize_to_pathway_node()
                protein_bel = protein.serialize_to_protein_node()
                graph.add_part_of(protein_bel, pathway_bel)

        return graph
Esempio n. 2
0
    def to_bel(self,
               drug_namespace: Optional[str] = None,
               target_namespace: Optional[str] = None) -> BELGraph:
        """Export DrugBank as BEL."""
        graph = BELGraph(
            name='DrugBank',
            version='5.1.4',
        )

        self.add_namespace_to_graph(graph)

        hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine,
                                            session=self.session)
        hgnc_manager.add_namespace_to_graph(graph)

        dpis = self.list_drug_protein_interactions()
        dpis: Iterable[DrugProteinInteraction] = tqdm(
            dpis,
            total=self.count_drug_protein_interactions(),
            desc='Mapping drug-protein interactions to BEL',
        )
        for dpi in dpis:
            dpi.add_to_graph(graph,
                             drug_namespace=drug_namespace,
                             target_namespace=target_namespace)

        return graph
Esempio n. 3
0
    def populate(cls):
        """Fill the HGNC and mirTarBase databases.

        Contents of the test Excel sheet:

        miRTarBase ID	miRNA	Species (miRNA)	Target Gene	Target Gene (Entrez Gene ID)	Species (Target Gene)	Experiments	Support Type	References (PMID)
        MIRT000002	hsa-miR-20a-5p	H**o sapiens	HIF1A	3091	H**o sapiens	Luciferase reporter assay//Western blot//Northern blot//qRT-PCR	Functional MTI	18632605
        MIRT000002	hsa-miR-20a-5p	H**o sapiens	HIF1A	3091	H**o sapiens	Luciferase reporter assay//qRT-PCR//Western blot	Functional MTI	23911400
        MIRT000002	hsa-miR-20a-5p	H**o sapiens	HIF1A	3091	H**o sapiens	HITS-CLIP	Functional MTI (Weak)	22473208
        MIRT000178	hsa-miR-20a-5p	H**o sapiens	TCEAL1	9338	H**o sapiens	Luciferase reporter assay//Microarray//Northern blot//qRT-PCR//Western blot	Functional MTI	23059786
        MIRT000004	dme-miR-8-3p	Drosophila melanogaster	ush	33225	Drosophila melanogaster	qRT-PCR//Luciferase reporter assay//Western blot	Functional MTI	20005803
        MIRT000005	mmu-miR-124-3p	Mus musculus	Itgb1	16412	Mus musculus	Luciferase reporter assay//Microarray//qRT-PCR	Functional MTI	18042700
        MIRT000005	mmu-miR-124-3p	Mus musculus	Itgb1	16412	Mus musculus	Luciferase reporter assay//qRT-PCR//Western blot//Reporter assay;Microarray	Functional MTI	18619591
        MIRT000006	hsa-miR-146a-5p	H**o sapiens	CXCR4	7852	H**o sapiens	qRT-PCR//Luciferase reporter assay//Western blot	Functional MTI	18568019
        MIRT000006	hsa-miR-146a-5p	H**o sapiens	CXCR4	7852	H**o sapiens	Microarray	Functional MTI (Weak)	20375304
        MIRT000012	hsa-miR-122-5p	H**o sapiens	CYP7A1	1581	H**o sapiens	qRT-PCR//Luciferase reporter assay	Functional MTI	20351063
        """
        cls.hgnc_manager = bio2bel_hgnc.Manager(connection=cls.connection)
        cls.hgnc_manager._create_tables()
        json_data = cls.hgnc_manager.load_hgnc_json(
            hgnc_file_path=TEST_HGNC_JSON)
        cls.hgnc_manager.insert_hgnc(hgnc_dict=json_data,
                                     silent=True,
                                     low_memory=False)

        cls.mirbase_manager = bio2bel_mirbase.Manager(
            connection=cls.connection)
        with open(TEST_MIRBASE_JSON) as file:
            mirbase_list = json.load(file)
            cls.mirbase_manager._populate_definitions_helper(mirbase_list)

        cls.manager.populate(TEST_MIRTARBASE_EXCEL)
Esempio n. 4
0
    def populate(self, url: Union[None, str, Iterable[str]] = None):
        """Populate the database.

        :param url: url from a GMT file
        """
        hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine,
                                            session=self.session)
        if not hgnc_manager.is_populated():
            hgnc_manager.populate()

        if url is None or isinstance(url, str):
            pathways = parse_gmt_file(url=url)
        elif isinstance(url, Iterable):
            pathways = [
                pathway for u in url for pathway in parse_gmt_file(url=u)
            ]
        else:
            raise TypeError(f'Invalid type for url: {type(url)} ({url})')

        # Dictionaries to map across identifiers
        entrez_to_hgnc_symbol = hgnc_manager.build_entrez_id_symbol_mapping()
        hgnc_symbol_id = hgnc_manager.build_hgnc_symbol_id_mapping()

        entrez_id_protein = {}
        missing_entrez_ids = set()

        it = tqdm(pathways, desc='Loading WikiPathways')
        for pathway_name, species, wikipathways_id, gene_set in it:
            pathway = self.get_or_create_pathway(
                wikipathways_id=wikipathways_id,
                name=pathway_name.strip(),
                species=species,
            )

            for entrez_id in gene_set:
                if entrez_id in entrez_id_protein:
                    protein = entrez_id_protein[entrez_id]

                else:
                    hgnc_symbol = entrez_to_hgnc_symbol.get(entrez_id)

                    if not hgnc_symbol:
                        it.write(
                            f"({species}) ncbigene:{entrez_id} has no HGNC symbol"
                        )
                        missing_entrez_ids.add(entrez_id)
                        continue

                    protein = self.get_or_create_protein(
                        entrez_id, hgnc_symbol, hgnc_symbol_id[hgnc_symbol])
                    entrez_id_protein[entrez_id] = protein

                if pathway not in protein.pathways:
                    protein.pathways.append(pathway)

            self.session.commit()

        if missing_entrez_ids:
            log.warning("Total of {} missing ENTREZ".format(
                len(missing_entrez_ids)))
Esempio n. 5
0
def prerender(graph: BELGraph,
              hgnc_manager=None) -> Mapping[str, Mapping[str, Any]]:
    """Generate the annotations JSON for Ideogram."""
    import bio2bel_hgnc
    from bio2bel_entrez.parser import get_human_refseq_slim_df
    from bio2bel_hgnc.models import HumanGene

    if hgnc_manager is None:
        hgnc_manager = bio2bel_hgnc.Manager()

    hgnc_symbols = {
        node.name
        for node in graph
        if isinstance(node, CentralDogma) and node.namespace.lower() == 'hgnc'
    }

    refseq_df = get_human_refseq_slim_df()

    result = {
        hgnc_symbol: dict(name=hgnc_symbol, start=start, stop=stop)
        for _, hgnc_symbol, start, stop in refseq_df[refseq_df['Symbol'].isin(
            hgnc_symbols)].values
    }

    human_genes = (hgnc_manager.session.query(
        HumanGene.symbol,
        HumanGene.location).filter(HumanGene.symbol.in_(hgnc_symbols)).all())
    for human_gene in human_genes:
        if human_gene.symbol not in result:
            continue  # something doesn't have a mapping in HGNC
        result[human_gene.symbol]['chr'] = (human_gene.location.split('q')[0]
                                            if 'q' in human_gene.location else
                                            human_gene.location.split('p')[0])

    return result
Esempio n. 6
0
def main():
    """Make mapping files."""
    manager = bio2bel_hgnc.Manager()

    if not manager.is_populated():
        manager.populate()

    entrez_id_to_hgnc_id = manager.build_entrez_id_to_hgnc_id_mapping()
    with open('entrez_id_to_hgnc_id.json', 'w') as file:
        json.dump(entrez_id_to_hgnc_id, file, indent=2, sort_keys=True)

    entrez_id_to_hgnc_symbol = manager.build_entrez_id_to_hgnc_symbol_mapping()
    with open('entrez_id_to_hgnc_symbol.json', 'w') as file:
        json.dump(entrez_id_to_hgnc_symbol, file, indent=2, sort_keys=True)
Esempio n. 7
0
    def __init__(self,
                 hgnc_manager: Optional[bio2bel_hgnc.Manager] = None) -> None:
        if hgnc_manager is None:
            logger.info('getting Bio2BEL HGNC manager')
            hgnc_manager = bio2bel_hgnc.Manager()

        if not hgnc_manager.is_populated():
            logger.info('populating HGNC')
            hgnc_manager.populate()

        logger.info('generating hgnc symbol to chromosome mapping')
        self.hgnc_symbol_to_chromosome = {
            str(symbol): CHROMOSOME_SPLIT_RE.split(location)[0]
            for symbol, location in hgnc_manager.session.query(
                HumanGene.symbol, HumanGene.location) if location is not None
        }

        logger.info('generating hgnc id to chromosome mapping')
        self.hgnc_id_to_chromosome = {
            str(hgnc_id): CHROMOSOME_SPLIT_RE.split(location)[0]
            for hgnc_id, location in hgnc_manager.session.query(
                HumanGene.identifier, HumanGene.location)
            if location is not None
        }

        self.hgnc_id_to_positions = {}
        self.hgnc_symbol_to_positions = {}

        logger.info('generating hgnc symbol to chromosome mapping')
        self.entrez_id_to_hgnc_id = hgnc_manager.build_entrez_id_to_hgnc_id_mapping(
        )

        logger.info('getting human refseq data')
        self.human_refseq_df = get_human_refseq_slim_df()

        logger.info('generating maps with refseq data')
        for entrez_id, symbol, start, end in self.human_refseq_df.values:
            hgnc_id = self.entrez_id_to_hgnc_id.get(str(entrez_id))
            if hgnc_id is None:
                logger.debug(
                    f'Could not find ncbigene:{entrez_id} in HGNC. May be withdrawn'
                )
                continue
            self.hgnc_id_to_positions[hgnc_id] = start, end
            self.hgnc_symbol_to_positions[symbol] = start, end

        # These will get populated as graphs are added with update_chromosome_map()
        self.chromosome_to_edge_keys = defaultdict(list)
        self.cross_chromosome_to_edge_keys = defaultdict(list)
Esempio n. 8
0
def _get_mappings():
    hgnc_manager = bio2bel_hgnc.Manager()
    if not hgnc_manager.is_populated():
        hgnc_manager.populate()
    hgnc_gene_symbol_to_hgnc_id = hgnc_manager.build_hgnc_symbol_id_mapping()

    mgi_manager = bio2bel_mgi.Manager()
    if not mgi_manager.is_populated():
        mgi_manager.populate()
    mgi_gene_symbol_to_mgi_id = mgi_manager.build_mgi_gene_symbol_to_mgi_id_mapping(
    )

    rgd_manager = bio2bel_rgd.Manager()
    if not rgd_manager.is_populated():
        rgd_manager.populate()
    rgd_gene_symbol_to_rgd_id = rgd_manager.build_rgd_gene_symbol_to_rgd_id_mapping(
    )
    return hgnc_gene_symbol_to_hgnc_id, mgi_gene_symbol_to_mgi_id, rgd_gene_symbol_to_rgd_id
Esempio n. 9
0
    def setUpClass(cls):
        """Create a temporary file and populate the database."""
        super().setUpClass()

        cls.engine, cls.session = build_engine_session(connection=cls.connection)

        # HGNC manager
        cls.hgnc_manager = bio2bel_hgnc.Manager(engine=cls.engine, session=cls.session)
        cls.hgnc_manager.create_all()
        cls.hgnc_manager.populate(hgnc_file_path=hgnc_test_path, use_hcop=False)

        # create temporary database
        cls.manager = Manager(engine=cls.engine, session=cls.session)

        # fill temporary database with test data
        cls.manager.populate(url=pathlib.Path(gene_sets_path).as_uri())

        # PyBEL manager
        cls.pybel_manager = pybel.Manager(engine=cls.engine, session=cls.session)
        cls.pybel_manager.create_all()
Esempio n. 10
0
    def get_pathway_graph_by_id(self,
                                wikipathways_id: str) -> Optional[BELGraph]:
        """Return a new graph corresponding to the pathway.

        :param wikipathways_id: WikiPathways identifier
        :return: A BEL Graph corresponding to the WikiPathways identifier

        Example Usage:

        >>> manager = Manager()
        >>> manager.get_pathway_graph_by_id('WP61') # Notch signaling pathway
        """
        pathway = self.get_pathway_by_id(wikipathways_id)

        if pathway is None:
            return

        graph = BELGraph(
            name=f'{pathway.name} ({pathway.species})',
            version='1.0.0',
        )

        wikipathways_namespace = self.upload_bel_namespace()
        graph.namespace_url[
            wikipathways_namespace.keyword] = wikipathways_namespace.url

        hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine,
                                            session=self.session)
        hgnc_namespace = hgnc_manager.upload_bel_namespace()
        graph.namespace_url[hgnc_namespace.keyword] = hgnc_namespace.url

        pathway_node = pathway.serialize_to_pathway_node()

        for protein in pathway.proteins:
            graph.add_part_of(protein.serialize_to_protein_node(),
                              pathway_node)

        return graph
Esempio n. 11
0
    def get_drug_to_hgnc_symbols(self,
                                 cache=True,
                                 recalculate=False) -> Dict[str, List[str]]:
        """Get a dictionary of drug names to HGNC gene symbols."""
        if cache and not recalculate and os.path.exists(
                _dti_symbols_cache_path):
            log.debug('loading cached DTIs with gene symbols')
            with open(_dti_symbols_cache_path) as file:
                return json.load(file)

        hgnc_manager = bio2bel_hgnc.Manager(engine=self.engine,
                                            session=self.session)
        if not hgnc_manager.is_populated():
            hgnc_manager.populate()

        hgnc_id_symbol_mapping = hgnc_manager.build_hgnc_id_symbol_mapping()
        drug_to_hgnc_ids = self.get_drug_to_hgnc_ids()

        rv = defaultdict(list)

        for drug, hgnc_ids in drug_to_hgnc_ids.items():
            for hgnc_id in hgnc_ids:
                hgnc_symbol = hgnc_id_symbol_mapping.get(hgnc_id)

                if hgnc_symbol is None:
                    log.warning('could not map HGNC identifier: %s', hgnc_id)
                    continue

                rv[drug].append(hgnc_symbol)

        if cache:
            with open(_dti_symbols_cache_path, 'w') as file:
                log.info('dumping cached DTIs')
                json.dump(rv, file)

        return dict(rv)
Esempio n. 12
0
def get_graph(
    force: bool = False,
    force_global: bool = False,
    names: Optional[NamesList] = None,
    resources_directory: Optional[str] = None,
) -> BELGraph:
    """Get all resources in a combine BELGraph.

    :param force: Should cached files be overwritten?
    :param force_global: Should the global cache file be overwritten?
    :param names: The name of the bio2bel packages to use and arguments
    :param resources_directory: A non-default place to store the resources
    """
    pickle_path = os.path.join(resources_directory or RESOURCES, CACHE_NAME)
    if not force_global and os.path.exists(pickle_path):
        logger.info(f'Getting cached full graph')
        return from_pickle(pickle_path)

    if names is None:
        names = DEFAULT_NAMES

    logger.info('Generating graphs')
    graphs = []
    for name, to_bel_kwargs in names:
        _graph = get_graph_by_manager(name,
                                      force=force,
                                      to_bel_kwargs=to_bel_kwargs)
        logger.info(_graph.summary_str())
        graphs.append(_graph)

    logger.info('Merging graphs')
    graph = pybel.union(graphs)
    graph.name = f'Graph from: {", ".join(graph.name for graph in graphs)}'
    graph.version = '0.0.1'
    logger.info('Finished merging graphs')

    logger.info('Preparing HGNC mappings')
    hgnc_manager = bio2bel_hgnc.Manager()
    hgnc_symbol_to_id = hgnc_manager.build_hgnc_symbol_id_mapping()
    entrez_id_to_hgnc_symbol = hgnc_manager.build_entrez_id_to_hgnc_symbol_mapping(
    )

    logger.info('Generating namespace mapping for nodes')
    mapping = {}
    for node in graph:
        namespace = node.get('namespace')
        if namespace is None:
            continue
        elif namespace.lower() in {
                'ncbigene', 'egid'
        } and node.identifier in entrez_id_to_hgnc_symbol:
            name = entrez_id_to_hgnc_symbol[node.identifier]
            identifier = hgnc_symbol_to_id[name]
            mapping[node] = node.__class__(
                namespace='hgnc',
                name=name,
                identifier=identifier,
            )

    logger.info('Relabeling nodes')
    nx.relabel_nodes(graph, mapping, copy=False)

    logger.info('Enriching central dogma')
    enrich_protein_and_rna_origins(graph)

    logger.info('Exporting snp2k pickle')
    to_pickle(graph, pickle_path)
    return graph
Esempio n. 13
0
    def init_app(self, app: flask.Flask) -> None:  # noqa: C901
        """Initialize a Flask app."""
        self.app = app
        app.extensions['bio2bel'] = self

        try:
            import bio2bel_chebi
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL ChEBI')
            self.chebi_manager = bio2bel_chebi.Manager(
                connection=self.connection)
            self.chebi_manager.create_all()
            in_place_transformation(
                self.chebi_manager.enrich_chemical_hierarchy)

        try:
            import bio2bel_hgnc
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL HGNC')
            self.hgnc_manager = bio2bel_hgnc.Manager(
                connection=self.connection)
            self.hgnc_manager.create_all()
            in_place_transformation(
                self.hgnc_manager.enrich_genes_with_families)
            in_place_transformation(
                self.hgnc_manager.enrich_families_with_genes)

        try:
            import bio2bel_mirtarbase
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL miRTarBase')
            self.mirtarbase_manager = bio2bel_mirtarbase.Manager(
                connection=self.connection)
            self.mirtarbase_manager.create_all()
            in_place_transformation(self.mirtarbase_manager.enrich_mirnas)
            in_place_transformation(self.mirtarbase_manager.enrich_rnas)

        try:
            import bio2bel_expasy
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL ExPASy')
            self.expasy_manager = bio2bel_expasy.Manager(
                connection=self.connection)
            self.expasy_manager.create_all()
            in_place_transformation(
                self.expasy_manager.enrich_proteins_with_enzyme_families)
            in_place_transformation(self.expasy_manager.enrich_enzymes)

        try:
            import bio2bel_go
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL GO')
            self.go_manager = bio2bel_go.Manager(connection=self.connection)
            in_place_transformation(self.go_manager.enrich_bioprocesses)

        try:
            import bio2bel_entrez
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL Entrez')
            self.entrez_manager = bio2bel_entrez.Manager(
                connection=self.connection)
            self.entrez_manager.create_all()

        try:
            import bio2bel_interpro
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL InterPro')
            self.interpro_manager = bio2bel_interpro.Manager(
                connection=self.connection)
            self.interpro_manager.create_all()

        try:
            import bio2bel_ctd
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL CTD')
            self.ctd_manager = bio2bel_ctd.Manager(connection=self.connection)
            self.ctd_manager.create_all()
            in_place_transformation(self.ctd_manager.enrich_graph_genes)

        try:
            import bio2bel_hmdb
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL HMDB')
            self.hmdb_manager = bio2bel_hmdb.Manager(
                connection=self.connection)
            self.hmdb_manager.create_all()

        try:
            import bio2bel_hmdd
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL HMDD')
            self.hmdd_manager = bio2bel_hmdd.Manager(
                connection=self.connection)
            self.hmdd_manager.create_all()

        try:
            import bio2bel_mir2disease
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL mir2disease')
            self.mir2disease_manager = bio2bel_mir2disease.Manager(
                connection=self.connection)
            self.mir2disease_manager.create_all()

        try:
            import bio2bel_drugbank
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL DrugBank')
            self.drugbank_manager = bio2bel_drugbank.Manager(
                connection=self.connection)
            self.drugbank_manager.create_all()

        try:
            import bio2bel_phosphosite
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL PhosphoSitePlus')
            self.phosphosite_manager = bio2bel_phosphosite.Manager(
                connection=self.connection)
            self.phosphosite_manager.create_all()

        try:
            import bio2bel_sider
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL SIDER')
            self.sider_manager = bio2bel_sider.Manager(
                connection=self.connection)
            self.sider_manager.create_all()

        try:
            import bio2bel_mesh
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL MeSH')
            self.mesh_manager = bio2bel_mesh.Manager(
                connection=self.connection)
            self.mesh_manager.create_all()

        try:
            import bio2bel_mgi
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL MGI')
            self.mgi_manager = bio2bel_mgi.Manager(connection=self.connection)
            self.mgi_manager.create_all()

        try:
            import bio2bel_rgd
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL RGD')
            self.rgd_manager = bio2bel_rgd.Manager(connection=self.connection)
            self.rgd_manager.create_all()

        try:
            import conso.manager
        except ImportError:
            pass
        else:
            logger.debug('Using Bio2BEL CONSO')
            self.conso_manager = conso.manager.Manager()

        self.manager_dict.update({
            name: manager
            for name, manager in self.__dict__.items()
            if name.endswith('_manager') and manager is not None
        })
Esempio n. 14
0
    def populate(self, source: Optional[str] = None, update: bool = False) -> None:
        """Populate database with the data from miRTarBase.

        :param source: path or link to data source needed for :func:`get_data`
        :param update: Should HGNC an miRBase be updated?
        """
        hgnc_manager = bio2bel_hgnc.Manager(connection=self.connection)
        if not hgnc_manager.is_populated() or update:
            hgnc_manager.populate()

        mirbase_manager = bio2bel_mirbase.Manager(connection=self.connection)
        if not mirbase_manager.is_populated() or update:
            mirbase_manager.populate()

        t = time.time()
        logger.info('getting data')
        df = get_data(source)
        logger.info('got data in %.2f seconds', time.time() - t)

        name_mirna = {}
        target_set = {}
        species_set = {}
        interaction_set = {}

        emap = _build_entrez_map(hgnc_manager)

        logger.info('building models')

        t = time.time()
        it = tqdm(df.values, total=len(df.index))
        for mirtarbase_id, mirna_name, mirna_species, gene_name, entrez_id, target_species, exp, sup_type, pubmed in it:
            # create new miRNA instance
            entrez_id = str(int(entrez_id))

            interaction_key = (mirna_name, entrez_id)
            interaction = interaction_set.get(interaction_key)

            if interaction is None:
                mirna = name_mirna.get(mirna_name)
                if mirna is None:
                    species = species_set.get(mirna_species)
                    if species is None:
                        species = species_set[mirna_species] = Species(name=mirna_species)
                        self.session.add(species)

                    mirna = name_mirna[mirna_name] = Mirna(
                        name=mirna_name,
                        species=species,
                    )
                    self.session.add(mirna)

                target = target_set.get(entrez_id)
                if target is None:
                    species = species_set.get(target_species)

                    if species is None:
                        species = species_set[target_species] = Species(name=target_species)
                        self.session.add(species)

                    target = target_set[entrez_id] = Target(
                        entrez_id=entrez_id,
                        species=species,
                        name=gene_name,
                    )

                    if entrez_id in emap:
                        g_first = emap[entrez_id]
                        target.hgnc_symbol = g_first.symbol
                        target.hgnc_id = str(g_first.identifier)

                    self.session.add(target)

                # create new interaction instance
                interaction = interaction_set[interaction_key] = Interaction(
                    mirtarbase_id=mirtarbase_id,
                    mirna=mirna,
                    target=target
                )
                self.session.add(interaction)

            # create new evidence instance
            new_evidence = Evidence(
                experiment=exp,
                support=sup_type,
                reference=pubmed,
                interaction=interaction,
            )
            self.session.add(new_evidence)

        logger.info('built models in %.2f seconds', time.time() - t)

        logger.info('committing models')
        t = time.time()
        self.session.commit()
        logger.info('committed after %.2f seconds', time.time() - t)