Example #1
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'biogrid',
            ingest_title=
            'Biological General Repository for Interaction Datasets',
            ingest_url='http://thebiogrid.org',
            license_url=
            'https://wiki.thebiogrid.org/doku.php/terms_and_conditions'
            # data_rights=None,
            # file_handle=None
        )

        self.tax_ids = tax_ids
        # Defaults
        # our favorite animals
        # taxids = [9606,10090,10116,7227,7955,6239,8355]
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        # data-source specific warnings
        # (will be removed when issues are cleared)
        logger.warning(
            "several MI experimental codes do not exactly map to ECO; "
            "using approximations.")
        return
Example #2
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'go',
            ingest_title='Gene Ontology',
            ingest_url='http://www.geneontology.org',
            license_url=None,
            data_rights='http://geneontology.org/page/use-and-license'
            # file_handle=None
        )

        # Defaults
        self.tax_ids = tax_ids
        self.test_ids = list()
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        if 'test_ids' not in config.get_config() or 'gene' \
                not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        # build the id map for mapping uniprot ids to genes ... ONCE
        self.uniprot_entrez_id_map = self.get_uniprot_entrez_id_map()
        self.eco_map = self.get_eco_map(self.map_files['eco_map'])
        return
Example #3
0
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ensembl')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset(
            'ensembl', 'ENSEMBL', 'http://www.ensembl.org', None)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        logger.setLevel(logging.INFO)

        return
Example #4
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 method,
                 tax_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            # method ??? this should be lowercase ingest name/identifier
            'oma',
            'Ortholgous MAtrix Hierarchical Orthologous Groups',
            'https://omabrowser.org/',
            license_url=None,
            data_rights="https://creativecommons.org/licenses/by-sa/2.5/",
            # file_handle=None
        )

        self.tax_ids = tax_ids
        self._map_orthology_code_to_RO_FOO = {
            'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'],
            'paralogGroup': OrthologyAssoc.ortho_rel['paralogous']
        }

        if 'test_ids' not in config.get_config() \
                or 'protein' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['protein']

        return
Example #5
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'coriell',
            ingest_title='Coriell Institute for Medical Research',
            ingest_url='https://ccr.coriell.org/'
            # website disclaimer 'https://www.coriell.org/1/About-Us/Legal-Notice'
            # wet material https://www.coriell.org/1/NINDS/About/Shared-Usage-Guidelines
            # license_url=None,
            # data_rights=None,
            # file_handle=None
        )

        # data-source specific warnings
        # (will be removed when issues are cleared)

        LOG.warning('We assume that if a species is not provided, '
                    'that it is a Human-derived cell line')
        LOG.warning('We map all omim ids as a disease/phenotype entity, '
                    'but should be fixed in the future')  # TODO

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or \
                'coriell' not in config.get_config()['dbauth']:
            LOG.error("not configured with FTP user/password.")
            # raise error

        return
Example #6
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 tax_ids=None,
                 gene_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'hgnc',
            ingest_title='HGNC',
            ingest_url='https://www.genenames.org/',
            license_url='ftp://ftp.ebi.ac.uk/pub/databases/genenames/README.txt'
            # data_rights=None,
            # file_handle=None
        )

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.hs_txid = self.globaltt['H**o sapiens']

        return
Example #7
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'genereviews',
            ingest_title='Gene Reviews',
            ingest_url='http://genereviews.org/',
            license_url=None,
            data_rights='http://www.ncbi.nlm.nih.gov/books/NBK138602/',
            # file_handle=None
        )

        self.dataset.set_citation('GeneReviews:NBK1116')

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            LOG.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        self.omim_replaced = {}  # id_num to SET of id nums
        self.omim_type = {}  # id_num to onto_term

        return
Example #8
0
    def fetch(self, is_dl_forced=False):
        '''connection details for DISCO'''
        cxn = {}
        cxn['host'] = 'nif-db.crbs.ucsd.edu'
        cxn['database'] = 'disco_crawler'
        cxn['port'] = '5432'
        cxn['user'] = config.get_config()['user']['disco']
        cxn['password'] = config.get_config()['keys'][cxn['user']]

        self.dataset.setFileAccessUrl(
            'jdbc:postgresql://'+cxn['host']+':'+cxn['port']+'/'+cxn['database'],
            is_object_literal=True)

        # process the tables
        # self.fetch_from_pgdb(self.tables,cxn,100)  #for testing
        self.fetch_from_pgdb(self.tables, cxn)

        self.get_files(is_dl_forced)

        # FIXME: Everything needed for data provenance?
        fstat = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1')))
        filedate = datetime.utcfromtimestamp(fstat[ST_CTIME]).strftime("%Y-%m-%d")
        self.dataset.setVersion(filedate)

        return
Example #9
0
    def __init__(self):
        Source.__init__(self, 'hpoa')

        self.load_bindings()

        self.dataset = Dataset(
            'hpoa', 'Human Phenotype Ontology',
            'http://www.human-phenotype-ontology.org', None,
            'http://www.human-phenotype-ontology.org/contao/index.php/legal-issues.html')

        self.replaced_id_count = 0

        if 'test_ids' not in config.get_config()\
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        # data-source specific warnings to be removed when issues are cleared
        logger.warning(
            "note that some ECO classes are missing for ICE, PCS, and ITM;" +
            " using temporary mappings.")

        return
Example #10
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'coriell',
            ingest_title='Coriell Institute for Medical Research',
            ingest_url='https://ccr.coriell.org/'
            # website disclaimer 'https://www.coriell.org/1/About-Us/Legal-Notice'
            # wet material https://www.coriell.org/1/NINDS/About/Shared-Usage-Guidelines
            # license_url=None,
            # data_rights=None,
            # file_handle=None
        )

        # data-source specific warnings
        # (will be removed when issues are cleared)

        LOG.warning(
            'We assume that if a species is not provided, '
            'that it is a Human-derived cell line')
        LOG.warning(
            'We map all omim ids as a disease/phenotype entity, '
            'but should be fixed in the future')  # TODO

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or \
                'coriell' not in config.get_config()['dbauth']:
            LOG.error("not configured with FTP user/password.")
            # raise error

        return
Example #11
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'go')

        # Defaults
        self.tax_ids = tax_ids
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'go', 'GeneOntology', 'http://www.geneontology.org', None,
            "https://creativecommons.org/licenses/by/4.0/legalcode",
            'http://geneontology.org/page/use-and-license')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        return
Example #12
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 tax_ids=None,
                 gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'ensembl')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids

        self.dataset = Dataset('ensembl', 'ENSEMBL',
                               'http://uswest.ensembl.org', None)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        logger.setLevel(logging.INFO)

        return
Example #13
0
    def __init__(self, tax_ids=None):
        super().__init__('biogrid')

        self.tax_ids = tax_ids
        self.load_bindings()

        self.dataset = Dataset(
            'biogrid', 'The BioGrid', 'http://thebiogrid.org/', None,
            'http://wiki.thebiogrid.org/doku.php/terms_and_conditions')

        # Defaults
        # our favorite animals
        # taxids = [9606,10090,10116,7227,7955,6239,8355]
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        # data-source specific warnings
        # (will be removed when issues are cleared)
        logger.warning(
            "several MI experimental codes do not exactly map to ECO; "
            "using approximations.")
        return
Example #14
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'ensembl',
            ingest_title='ENSEMBL',
            ingest_url='http://uswest.ensembl.org'
            # license_url=None,
            # data_rights=None,
            # file_handle=None
        )

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        logger.setLevel(logging.INFO)

        return
Example #15
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'kegg',
            ingest_title='Kyoto Encyclopedia of Genes and Genomes',
            ingest_url='http://www.genome.jp/kegg/',
            license_url='http://www.kegg.jp/kegg/legal.html'
            # data_rights=None,
            # file_handle=None
        )

        # check to see if there are any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            LOG.warning("not configured with disease test ids.")
        else:
            self.test_ids['disease'] += \
                config.get_config()['test_ids']['disease']

        self.label_hash = {}
        self.omim_disease_hash = {}  # to hold the mappings of omim:kegg ids
        self.kegg_disease_hash = {}  # to hold the mappings of kegg:omim ids

        return
Example #16
0
    def __init__(self, tax_ids=None):
        super().__init__("biogrid")

        self.tax_ids = tax_ids
        self.load_bindings()

        self.dataset = Dataset(
            "biogrid",
            "The BioGrid",
            "http://thebiogrid.org/",
            None,
            "http://wiki.thebiogrid.org/doku.php/terms_and_conditions",
        )

        # Defaults
        # taxids = [9606,10090,10116,7227,7955,6239,8355]  #our favorite animals
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        if "test_ids" not in config.get_config() or "gene" not in config.get_config()["test_ids"]:
            logger.warn("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()["test_ids"]["gene"]

        # data-source specific warnings (will be removed when issues are cleared)
        logger.warn("several MI experimental codes do not exactly map to ECO; using approximations.")
        return
Example #17
0
    def __init__(self, graph_type, are_bnodes_skolemized,
                 tax_ids=None, gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'ncbigene')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'

        self.dataset = Dataset(
            'ncbigene', 'National Center for Biotechnology Information',
            'http://ncbi.nih.nlm.gov/gene', None,
            'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
            'https://creativecommons.org/publicdomain/mark/1.0/')

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        self.gene_ids = []
        if 'test_ids' not in \
                config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        self.class_or_indiv = {}

        return
Example #18
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 data_release_version=None):
        super().__init__(
            graph_type=graph_type,
            are_bnodes_skolemized=are_bnodes_skolemized,
            data_release_version=data_release_version,
            name='omim',
            ingest_title='Online Mendelian Inheritance in Man',
            ingest_url='http://www.omim.org',
            ingest_logo='source-omim.png',
            # ingest_desc=None,
            license_url=None,
            data_rights='http://omim.org/help/agreement',
            # file_handle=None
        )

        self.omim_ncbigene_idmap = {}

        # check if config exists; if it doesn't, error out and let user know
        if 'keys' not in config.get_config() and \
                'omim' not in config.get_config()['keys']:
            LOG.error("not configured with API key.")

        if 'disease' in self.all_test_ids:
            # local_id (numeric) portion of omim identifier
            self.test_ids = [
                x[5:] for x in self.all_test_ids['disease'] if x[:5] == 'OMIM:']
        else:
            LOG.warning("not configured with gene test ids.")
            self.test_ids = []

        self.disorder_regex = re.compile(r'(.*), (\d{6})\s*(?:\((\d+)\))?')
        self.nogene_regex = re.compile(r'(.*)\s+\((\d+)\)')
Example #19
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None, gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'ensembl')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids

        self.dataset = Dataset(
            'ensembl', 'ENSEMBL', 'http://uswest.ensembl.org', None)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        logger.setLevel(logging.INFO)

        return
Example #20
0
    def fetch(self, is_dl_forced=False):
        '''connection details for DISCO'''
        cxn = {}
        cxn['host'] = 'nif-db.crbs.ucsd.edu'
        cxn['database'] = 'disco_crawler'
        cxn['port'] = '5432'
        cxn['user'] = config.get_config()['user']['disco']
        cxn['password'] = config.get_config()['keys'][cxn['user']]

        self.dataset.setFileAccessUrl('jdbc:postgresql://' + cxn['host'] +
                                      ':' + cxn['port'] + '/' +
                                      cxn['database'],
                                      is_object_literal=True)

        # process the tables
        # self.fetch_from_pgdb(self.tables,cxn,100)  #for testing
        self.fetch_from_pgdb(self.tables, cxn)

        self.get_files(is_dl_forced)

        # FIXME: Everything needed for data provenance?
        fstat = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1')))
        filedate = datetime.utcfromtimestamp(
            fstat[ST_CTIME]).strftime("%Y-%m-%d")
        self.dataset.setVersion(filedate)
Example #21
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'go')

        # Defaults
        self.tax_ids = tax_ids
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'go', 'GeneOntology', 'http://www.geneontology.org', None,
            "https://creativecommons.org/licenses/by/4.0/legalcode",
            'http://geneontology.org/page/use-and-license')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        return
Example #22
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'omim')

        self.dataset = Dataset(
            'omim', 'Online Mendelian Inheritance in Man',
            'http://www.omim.org', None,
            'http://omim.org/help/agreement')

        self.omim_ncbigene_idmap = {}

        # data-source specific warnings
        # (will be removed when issues are cleared)

        # check if config exists; if it doesn't, error out and let user know
        if 'keys' not in config.get_config() and \
                'omim' not in config.get_config()['keys']:
            logger.error("not configured with API key.")

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
        else:
            # select ony those test ids that are omim's.
            self.test_ids += \
                [obj.replace('OMIM:', '')
                 for obj in config.get_config()['test_ids']['disease']
                 if re.match(r'OMIM:', obj)]

        return
Example #23
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'biogrid')

        self.tax_ids = tax_ids

        self.dataset = Dataset(
            'biogrid', 'The BioGrid', 'http://thebiogrid.org/', None,
            'http://wiki.thebiogrid.org/doku.php/terms_and_conditions')

        # Defaults
        # our favorite animals
        # taxids = [9606,10090,10116,7227,7955,6239,8355]
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        # data-source specific warnings
        # (will be removed when issues are cleared)
        logger.warning(
            "several MI experimental codes do not exactly map to ECO; "
            "using approximations.")
        return
Example #24
0
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ncbigene')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'
        self.load_bindings()

        self.dataset = Dataset('ncbigene', 'National Center for Biotechnology Information',
                               'http://ncbi.nih.nlm.gov/gene', None,
                               'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
                               'https://creativecommons.org/publicdomain/mark/1.0/')
        # data-source specific warnings (will be removed when issues are cleared)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return
Example #25
0
    def __init__(self, graph_type, are_bnodes_skolemized, method, tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, method)
        self.tax_ids = tax_ids
        self._map_orthology_code_to_RO = {
            'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'],
            'paralogGroup': OrthologyAssoc.ortho_rel['paralogous']}

        if 'test_ids' not in config.get_config() \
                or 'protein' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['protein']

        return
Example #26
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'orphanet')

        self.dataset = Dataset(
            'orphanet', 'Orphanet', 'http://www.orpha.net', None,
            'http://creativecommons.org/licenses/by-nd/3.0/',
            'http://omim.org/help/agreement')

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return
Example #27
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'orphanet')

        self.dataset = Dataset(
            'orphanet', 'Orphanet', 'http://www.orpha.net', None,
            'http://creativecommons.org/licenses/by-nd/3.0/',
            'http://omim.org/help/agreement')

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return
Example #28
0
    def fetch(self, is_dl_forced=False):
        '''create the connection details for DISCO'''

        cxn = config.get_config()['dbauth']['disco']
        cxn.update({
            'host': 'nif-db.crbs.ucsd.edu',
            'database': 'disco_crawler',
            'port': 5432
        })

        self.dataset.setFileAccessUrl(''.join(
            ('jdbc:postgresql://', cxn['host'], ':', str(cxn['port']), '/',
             cxn['database'])),
                                      is_object_literal=True)

        # process the tables
        # self.fetch_from_pgdb(self.tables,cxn,100)  #for testing
        self.fetch_from_pgdb(self.tables, cxn)

        self.get_files(is_dl_forced)

        # FIXME: Everything needed for data provenance?
        st = os.stat('/'.join((self.rawdir, 'dvp.pr_nlx_157874_1')))
        filedate = datetime.utcfromtimestamp(st[ST_CTIME]).strftime("%Y-%m-%d")
        self.dataset.setVersion(filedate)

        return
Example #29
0
    def __init__(self):
        Source.__init__(self, 'coriell')

        self.load_bindings()

        self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None)

        # data-source specific warnings (will be removed when issues are cleared)

        logger.warn('We assume that if a species is not provided, that it is a Human-derived cell line')
        logger.warn('We map all omim ids as a disease/phenotype entity, but should be fixed in the future')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'coriell' not in config.get_config()['dbauth']:
            logger.error("not configured with FTP user/password.")

        return
Example #30
0
    def __init__(self):
        Source.__init__(self, 'orphanet')

        self.load_bindings()

        self.dataset = Dataset(
            'orphanet', 'Orphanet', 'http://www.orpha.net', None,
            'http://creativecommons.org/licenses/by-nd/3.0/',
            'http://omim.org/help/agreement')

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return
Example #31
0
    def __init__(self):
        Source.__init__(self, 'eom')
        self.namespaces.update(curie_map.get())

        # update the dataset object with details about this resource
        # TODO put this into a conf file?
        self.dataset = Dataset('eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 
                               'http://www.genome.gov/copyright.cfm',
                               'https://creativecommons.org/publicdomain/mark/1.0/')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'disco' not in config.get_config()['dbauth']:
            logger.error("not configured with PG user/password.")

        # source-specific warnings.  will be cleared when resolved.

        return
Example #32
0
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'hgnc')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset('hgnc', 'HGNC', 'http://www.genenames.org', None)

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return
Example #33
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'eom')

        # update the dataset object with details about this resource
        # TODO put this into a conf file?
        self.dataset = Dataset(
            'eom', 'EOM', 'http://elementsofmorphology.nih.gov', None,
            'http://www.genome.gov/copyright.cfm',
            'https://creativecommons.org/publicdomain/mark/1.0/')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or \
                'disco' not in config.get_config()['dbauth']:
            logger.error("not configured with PG user/password.")

        # source-specific warnings.  will be cleared when resolved.

        return
Example #34
0
    def __init__(self):
        Source.__init__(self, 'gwascatalog')

        self.load_bindings()

        self.dataset = Dataset(
            'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/',
            'The NHGRI-EBI Catalog of published genome-wide association studies',
            'http://creativecommons.org/licenses/by/3.0/', None)
        # 'http://www.ebi.ac.uk/gwas/docs/about'  # TODO add this

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']

        return
Example #35
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 method,
                 tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, method)
        self.tax_ids = tax_ids
        self._map_orthology_code_to_RO = {
            'orthologGroup': OrthologyAssoc.ortho_rel['orthologous'],
            'paralogGroup': OrthologyAssoc.ortho_rel['paralogous']
        }

        if 'test_ids' not in config.get_config() \
                or 'protein' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['protein']

        return
Example #36
0
    def __init__(self):
        Source.__init__(self, 'kegg')

        # update the dataset object with details about this resource
        self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None,
                               'http://www.kegg.jp/kegg/legal.html')

        # source-specific warnings.  will be cleared when resolved.
        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            self.test_ids['disease'] += config.get_config()['test_ids']['disease']

        self.label_hash = {}
        self.omim_disease_hash = {}  # to hold the mappings of omim:kegg ids
        self.kegg_disease_hash = {}  # to hold the mappings of kegg:omim ids

        return
Example #37
0
    def __init__(self):
        Source.__init__(self, "orphanet")

        self.load_bindings()

        self.dataset = Dataset(
            "orphanet",
            "Orphanet",
            "http://www.orpha.net",
            None,
            "http://creativecommons.org/licenses/by-nd/3.0/",
            "http://omim.org/help/agreement",
        )

        # check to see if there's any ids configured in the config; otherwise, warn
        if "test_ids" not in config.get_config() or "disease" not in config.get_config()["test_ids"]:
            logger.warn("not configured with disease test ids.")

        return
Example #38
0
    def __init__(self, graph_type, are_bnodes_skolemized, tax_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'panther')
        self.tax_ids = tax_ids

        self.dataset = Dataset(
            'panther', 'Protein ANalysis THrough Evolutionary Relationships',
            'http://pantherdb.org/', None,
            'http://www.pantherdb.org/terms/disclaimer.jsp')

        # # Defaults
        # if self.tax_ids is None:
        #     self.tax_ids = [9606, 10090, 7955]

        if 'test_ids' not in config.get_config() \
                or 'protein' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['protein']

        return
Example #39
0
    def __init__(self, graph_type, are_bnodes_skolemized,
                 tax_ids=None, gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'hgnc')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids

        self.dataset = Dataset(
            'hgnc', 'HGNC', 'http://www.genenames.org', None)

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return
Example #40
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'decipher')

        self.dataset = Dataset(
            'decipher', 'Development Disorder Genotype – Phenotype Database',
            'https://decipher.sanger.ac.uk/', None,
            'https://decipher.sanger.ac.uk/legal')

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        self.g = self.graph
        self.geno = Genotype(self.g)
        self.model = Model(self.g)

        return
Example #41
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'coriell')

        self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/',
                               None)

        # data-source specific warnings
        # (will be removed when issues are cleared)

        logger.warning('We assume that if a species is not provided, '
                       'that it is a Human-derived cell line')
        logger.warning('We map all omim ids as a disease/phenotype entity, '
                       'but should be fixed in the future')  # TODO

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or \
                'coriell' not in config.get_config()['dbauth']:
            logger.error("not configured with FTP user/password.")

        return
Example #42
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'genereviews')

        self.dataset = Dataset(
            'genereviews', 'Gene Reviews', 'http://genereviews.org/',
            None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return
Example #43
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'orphanet',
            ingest_title='Orphanet',
            ingest_url='http://www.orpha.net',
            license_url='http://creativecommons.org/licenses/by-nd/3.0/',
            data_rights='http://omim.org/help/agreement'
            # file_handle=None
        )

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        # TODO remove
        if 'test_ids' not in config.get_config() or 'disease' \
                not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return
Example #44
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'genereviews')

        self.dataset = Dataset('genereviews', 'Gene Reviews',
                               'http://genereviews.org/', None,
                               'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return
Example #45
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'eom',
            ingest_title='Elements of Morphology',
            ingest_url='http://elementsofmorphology.nih.gov',
            data_rights='http://www.genome.gov/copyright.cfm',
            license_url='https://creativecommons.org/publicdomain/mark/1.0/'
            # file_handle=None
        )

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or \
                'disco' not in config.get_config()['dbauth']:
            logger.error("not configured with PG user/password.")

        # source-specific warnings.  will be cleared when resolved.

        return
Example #46
0
    def __init__(self, tax_ids=None):
        super().__init__('panther')
        self.tax_ids = tax_ids
        self.load_bindings()

        self.dataset = Dataset('panther', 'Protein ANalysis THrough Evolutionary Relationships', 
                               'http://pantherdb.org/', None,
                               'http://www.pantherdb.org/terms/disclaimer.jsp')

        # # Defaults
        # if self.tax_ids is None:
        #     self.tax_ids = [9606, 10090, 7955]

        if 'test_ids' not in config.get_config() or 'protein' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['protein']

        # data-source specific warnings (will be removed when issues are cleared)

        return
Example #47
0
    def __init__(self, graph_type, are_bnodes_skolemized,
                 tax_ids=None, gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'clinvar')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'

        self.dataset = Dataset(
            'ClinVar', 'National Center for Biotechnology Information',
            'http://www.ncbi.nlm.nih.gov/clinvar/', None,
            'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
            'https://creativecommons.org/publicdomain/mark/1.0/')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
        else:
            self.disease_ids = config.get_config()['test_ids']['disease']

        self.properties = Feature.properties

        return
Example #48
0
    def __init__(self):
        Source.__init__(self, 'ctd')
        self.dataset = Dataset(
            'ctd', 'CTD', 'http://ctdbase.org', None,
            'http://ctdbase.org/about/legal.jsp')

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return
Example #49
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type,
                         are_bnodes_skolemized,
                         'ctd',
                         ingest_title='Comparative Toxicogenomics Database',
                         ingest_url='http://ctdbase.org',
                         license_url='http://ctdbase.org/about/legal.jsp'
                         # data_rights=None,
                         # file_handle=None
                         )

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        return
Example #50
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'ctd')
        self.dataset = Dataset(
            'ctd', 'CTD', 'http://ctdbase.org', None,
            'http://ctdbase.org/about/legal.jsp')

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.g = self.graph
        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        return
Example #51
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 tax_ids=None,
                 gene_ids=None):
        super().__init__(graph_type, are_bnodes_skolemized, 'clinvar')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'

        self.dataset = Dataset(
            'ClinVar', 'National Center for Biotechnology Information',
            'http://www.ncbi.nlm.nih.gov/clinvar/', None,
            'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
            'https://creativecommons.org/publicdomain/mark/1.0/')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
        else:
            self.disease_ids = config.get_config()['test_ids']['disease']

        self.properties = Feature.properties

        return
Example #52
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'kegg')

        # update the dataset object with details about this resource
        self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/',
                               None, None,
                               'http://www.kegg.jp/kegg/legal.html')

        # check to see if there are any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
        else:
            self.test_ids['disease'] += \
                config.get_config()['test_ids']['disease']

        self.label_hash = {}
        self.omim_disease_hash = {}  # to hold the mappings of omim:kegg ids
        self.kegg_disease_hash = {}  # to hold the mappings of kegg:omim ids

        return
Example #53
0
    def __init__(self):
        Source.__init__(self, 'decipher')

        self.load_bindings()

        self.dataset = Dataset(
            'decipher', 'Development Disorder Genotype – Phenotype Database',
            'https://decipher.sanger.ac.uk/', None,
            'https://decipher.sanger.ac.uk/legal')

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return
Example #54
0
    def __init__(self,
                 graph_type,
                 are_bnodes_skolemized,
                 tax_ids=None,
                 gene_ids=None):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'ncbigene',
            ingest_title='National Center for Biotechnology Information',
            ingest_url='http://ncbi.nih.nlm.gov/gene',
            # ingest_desc=None,
            license_url='https://creativecommons.org/publicdomain/mark/1.0/',
            data_rights='http://www.ncbi.nlm.nih.gov/About/disclaimer.html'
            # file_handle=None
        )

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.id_filter = 'taxids'  # 'geneids

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' \
                not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.class_or_indiv = {}

        return
Example #55
0
    def __init__(self):
        Source.__init__(self, 'omim')

        self.load_bindings()

        self.dataset = Dataset('omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org',
                               None, 'http://omim.org/help/agreement')

        # data-source specific warnings (will be removed when issues are cleared)

        # check if config exists; if it doesn't, error out and let user know
        if 'keys' not in config.get_config() and 'omim' not in config.get_config()['keys']:
            logger.error("not configured with API key.")

        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            # select ony those test ids that are omim's.
            self.test_ids += [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease']
                              if re.match('OMIM:', obj)]

        return
Example #56
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'gwascatalog')

        if graph_type != 'rdf_graph':
            raise ValueError("UDP requires a rdf_graph")

        self.dataset = Dataset(
            'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/',
            'The NHGRI-EBI Catalog of published genome-wide association studies',
            'http://creativecommons.org/licenses/by/3.0/', None)
        # 'http://www.ebi.ac.uk/gwas/docs/about'  # TODO add this

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']

        # build a dictionary of genomic location to identifiers,
        # to try to get the equivalences
        self.id_location_map = dict()

        return
Example #57
0
    def __init__(self):
        Source.__init__(self, 'genereviews')

        self.load_bindings()

        self.dataset = Dataset(
            'genereviews', 'Gene Reviews', 'http://genereviews.org/',
            None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.gu = GraphUtils(curie_map.get())

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return