def __init__(self): Source.__init__(self, 'hpoa') self.load_bindings() self.dataset = Dataset( 'hpoa', 'Human Phenotype Ontology', 'http://www.human-phenotype-ontology.org', None, 'http://www.human-phenotype-ontology.org/contao/index.php/legal-issues.html') self.replaced_id_count = 0 if 'test_ids' not in config.get_config()\ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = [] else: self.test_ids = config.get_config()['test_ids']['disease'] # data-source specific warnings to be removed when issues are cleared logger.warning( "note that some ECO classes are missing for ICE, PCS, and ITM;" + " using temporary mappings.") return
def __init__(self, graph_type, are_bnodes_skolemized): Source.__init__( self, graph_type, are_bnodes_skolemized, 'mpd', ingest_title='Mouse Phenome Database', ingest_url='https://phenome.jax.org/', # license_url=None, data_rights='https://phenome.jax.org/about/termsofuse' # file_handle=None ) # @N, not sure if this step is required self.stdevthreshold = 2 # TODO add a citation for mpd dataset as a whole self.dataset.set_citation('PMID:15619963') self.assayhash = {} self.idlabel_hash = {} # to store the mean/zscore of each measure by strain+sex self.score_means_by_measure = {} # to store the mean value for each measure by strain+sex self.strain_scores_by_measure = {} return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'clinvar') self.tax_ids = tax_ids self.gene_ids = gene_ids self.filter = 'taxids' self.load_bindings() self.dataset = Dataset('ClinVar', 'National Center for Biotechnology Information', 'http://www.ncbi.nlm.nih.gov/clinvar/', None, 'http://www.ncbi.nlm.nih.gov/About/disclaimer.html', 'https://creativecommons.org/publicdomain/mark/1.0/') if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']: logger.warn("not configured with disease test ids.") else: self.disease_ids = config.get_config()['test_ids']['disease'] self.properties = Feature.properties return
def __init__(self, tax_ids=None): Source.__init__(self, 'go') # Defaults self.tax_ids = tax_ids if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) # update the dataset object with details about this resource # NO LICENSE for this resource self.dataset = Dataset( 'go', 'GeneOntology', 'http://www.geneontology.org', None, "https://creativecommons.org/licenses/by/4.0/legalcode", 'http://geneontology.org/page/use-and-license') if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids']['gene'] return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'ncbigene') self.tax_ids = tax_ids self.gene_ids = gene_ids self.filter = 'taxids' self.load_bindings() self.dataset = Dataset('ncbigene', 'National Center for Biotechnology Information', 'http://ncbi.nih.nlm.gov/gene', None, 'http://www.ncbi.nlm.nih.gov/About/disclaimer.html', 'https://creativecommons.org/publicdomain/mark/1.0/') # data-source specific warnings (will be removed when issues are cleared) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] logger.info("No taxa set. Defaulting to %s", str(tax_ids)) else: logger.info("Filtering on the following taxa: %s", str(tax_ids)) self.gene_ids = [] if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties return
def __init__(self): Source.__init__(self, 'ctd') self.dataset = Dataset( 'ctd', 'CTD', 'http://ctdbase.org', None, 'http://ctdbase.org/about/legal.jsp') if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") self.test_geneids = [] else: self.test_geneids = config.get_config()['test_ids']['gene'] if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_diseaseids = [] else: self.test_diseaseids = config.get_config()['test_ids']['disease'] self.gu = GraphUtils(curie_map.get()) self.g = self.graph self.geno = Genotype(self.g) return
def __init__(self): Source.__init__(self, 'mpd') # @N, not sure if this step is required self.namespaces.update(curie_map.get()) self.stdevthreshold = 2 self.nobnodes = True # FIXME # update the dataset object with details about this resource # @N: Note that there is no license as far as I can tell self.dataset = Dataset( 'mpd', 'MPD', 'http://phenome.jax.org', None, None) # TODO add a citation for mpd dataset as a whole self.dataset.set_citation('PMID:15619963') self.assayhash = {} self.idlabel_hash = {} # to store the mean/zscore of each measure by strain+sex self.score_means_by_measure = {} # to store the mean value for each measure by strain+sex self.strain_scores_by_measure = {} self.geno = Genotype(self.graph) self.gu = GraphUtils(curie_map.get()) return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'ensembl') self.tax_ids = tax_ids self.gene_ids = gene_ids self.load_bindings() self.dataset = Dataset( 'ensembl', 'ENSEMBL', 'http://www.ensembl.org', None) # Defaults if self.tax_ids is None: self.tax_ids = [9606, 10090, 7955] self.gene_ids = [] if 'test_ids' not in config.get_config() \ or 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties logger.setLevel(logging.INFO) return
def __init__(self, graph_type, are_bnodes_skolemized): Source.__init__( self, graph_type, are_bnodes_skolemized, 'mpd', ingest_title='Mouse Phenome Database', ingest_url='https://phenome.jax.org/', # license_url=None, data_rights='https://phenome.jax.org/about/termsofuse' # file_handle=None ) # @N, not sure if this step is required self.stdevthreshold = 2 # TODO add a citation for mpd dataset as a whole self.dataset.set_citation('PMID:15619963') self.assayhash = {} self.idlabel_hash = {} # to store the mean/zscore of each measure by strain+sex self.score_means_by_measure = {} # to store the mean value for each measure by strain+sex self.strain_scores_by_measure = {} return
def __init__(self): Source.__init__(self, 'mmrrc') self.strain_hash = {} self.id_label_hash = {} self.load_bindings() self.dataset = Dataset( 'mmrrc', 'Mutant Mouse Regional Resource Centers', 'https://www.mmrrc.org', None, 'https://www.mmrrc.org/about/data_download.php') return
def __init__(self): Source.__init__(self, 'wormbase') # update the dataset object with details about this resource # NO LICENSE for this resource self.dataset = Dataset( 'wormbase', 'WormBase', 'http://www.wormbase.org', None, None, 'http://www.wormbase.org/about/policies#012') self.version_num = None return
def __init__(self): Source.__init__(self, 'impc') # update the dataset object with details about this resource self.dataset = Dataset('impc', 'IMPC', 'http://www.mousephenotype.org', None, 'https://raw.githubusercontent.com/mpi2/PhenotypeArchive/master/LICENSE') # TODO add a citation for impc dataset as a whole # :impc cito:citesAsAuthority PMID:24194600 return
def __init__(self): Source.__init__(self, 'animalqtldb') # update the dataset object with details about this resource self.dataset = Dataset('animalqtldb', 'Animal QTL db', 'http://www.animalgenome.org/cgi-bin/QTLdb/index', None, None, 'http://www.animalgenome.org/QTLdb/faq#23') # source-specific warnings. will be cleared when resolved. logger.warn("No licences or rights exist for the raw data from this resource.") return
def __init__(self): Source.__init__(self, 'eom') self.namespaces.update(curie_map.get()) # update the dataset object with details about this resource # TODO put this into a conf file? self.dataset = Dataset('eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 'http://www.genome.gov/copyright.cfm', 'https://creativecommons.org/publicdomain/mark/1.0/') # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or 'disco' not in config.get_config()['dbauth']: logger.error("not configured with PG user/password.") # source-specific warnings. will be cleared when resolved. return
def __init__(self): Source.__init__(self, 'orphanet') self.load_bindings() self.dataset = Dataset( 'orphanet', 'Orphanet', 'http://www.orpha.net', None, 'http://creativecommons.org/licenses/by-nd/3.0/', 'http://omim.org/help/agreement') # check to see if there's any ids configured in the config; # otherwise, warn if 'test_ids' not in config.get_config() or \ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") return
def __init__(self): Source.__init__(self, 'coriell') self.load_bindings() self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None) # data-source specific warnings (will be removed when issues are cleared) logger.warn('We assume that if a species is not provided, that it is a Human-derived cell line') logger.warn('We map all omim ids as a disease/phenotype entity, but should be fixed in the future') # check if config exists; if it doesn't, error out and let user know if 'dbauth' not in config.get_config() or 'coriell' not in config.get_config()['dbauth']: logger.error("not configured with FTP user/password.") return
def __init__(self, tax_ids=None, gene_ids=None): Source.__init__(self, 'hgnc') self.tax_ids = tax_ids self.gene_ids = gene_ids self.load_bindings() self.dataset = Dataset('hgnc', 'HGNC', 'http://www.genenames.org', None) self.gene_ids = [] if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']: logger.warn("not configured with gene test ids.") else: self.gene_ids = config.get_config()['test_ids']['gene'] self.properties = Feature.properties return
def __init__(self): Source.__init__(self, 'gwascatalog') self.load_bindings() self.dataset = Dataset( 'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/', 'The NHGRI-EBI Catalog of published genome-wide association studies', 'http://creativecommons.org/licenses/by/3.0/', None) # 'http://www.ebi.ac.uk/gwas/docs/about' # TODO add this if 'test_ids' not in config.get_config() or \ 'gene' not in config.get_config()['test_ids']: logger.warning("not configured with gene test ids.") else: self.test_ids = config.get_config()['test_ids'] return
def __init__(self): Source.__init__(self, 'kegg') # update the dataset object with details about this resource self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None, 'http://www.kegg.jp/kegg/legal.html') # source-specific warnings. will be cleared when resolved. # check to see if there's any ids configured in the config; otherwise, warn if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']: logger.warn("not configured with disease test ids.") else: self.test_ids['disease'] += config.get_config()['test_ids']['disease'] self.label_hash = {} self.omim_disease_hash = {} # to hold the mappings of omim:kegg ids self.kegg_disease_hash = {} # to hold the mappings of kegg:omim ids return
def __init__(self): Source.__init__(self, 'omia') self.load_bindings() self.dataset = Dataset( 'omia', 'Online Mendelian Inheritance in Animals', 'http://omia.angis.org.au', None, None, 'http://sydney.edu.au/disclaimer.shtml') self.id_hash = { 'article': {}, 'phene': {}, 'breed': {}, 'taxon': {}, 'gene': {} } self.label_hash = {} self.gu = GraphUtils(curie_map.get()) # used to store the omia to omim phene mappings self.omia_omim_map = {} # used to store the unique genes that have phenes # (for fetching orthology) self.annotated_genes = set() self.test_ids = { 'disease': [ 'OMIA:001702', 'OMIA:001867', 'OMIA:000478', 'OMIA:000201', 'OMIA:000810', 'OMIA:001400'], 'gene': [ 492297, 434, 492296, 3430235, 200685834, 394659996, 200685845, 28713538, 291822383], 'taxon': [9691, 9685, 9606, 9615, 9913, 93934, 37029, 9627, 9825], # to be filled in during parsing of breed table # for lookup by breed-associations 'breed': [] } # to store a map of omia ids and any molecular info # to write a report for curation self.stored_omia_mol_gen = {} self.g = self.graph self.geno = Genotype(self.g) return
def __init__(self): Source.__init__(self, "orphanet") self.load_bindings() self.dataset = Dataset( "orphanet", "Orphanet", "http://www.orpha.net", None, "http://creativecommons.org/licenses/by-nd/3.0/", "http://omim.org/help/agreement", ) # check to see if there's any ids configured in the config; otherwise, warn if "test_ids" not in config.get_config() or "disease" not in config.get_config()["test_ids"]: logger.warn("not configured with disease test ids.") return
def __init__(self, graph_type, are_bnodes_skolemized): Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd') # @N, not sure if this step is required self.stdevthreshold = 2 # update the dataset object with details about this resource # @N: Note that there is no license as far as I can tell self.dataset = Dataset( 'mpd', 'MPD', 'http://phenome.jax.org', None, None) # TODO add a citation for mpd dataset as a whole self.dataset.set_citation('PMID:15619963') self.assayhash = {} self.idlabel_hash = {} # to store the mean/zscore of each measure by strain+sex self.score_means_by_measure = {} # to store the mean value for each measure by strain+sex self.strain_scores_by_measure = {} return
def __init__(self, graph_type, are_bnodes_skolemized): Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd') # @N, not sure if this step is required self.stdevthreshold = 2 # update the dataset object with details about this resource # @N: Note that there is no license as far as I can tell self.dataset = Dataset( 'mpd', 'MPD', 'http://phenome.jax.org', None, None) # TODO add a citation for mpd dataset as a whole self.dataset.set_citation('PMID:15619963') self.assayhash = {} self.idlabel_hash = {} # to store the mean/zscore of each measure by strain+sex self.score_means_by_measure = {} # to store the mean value for each measure by strain+sex self.strain_scores_by_measure = {} return
def __init__(self): Source.__init__(self, 'decipher') self.load_bindings() self.dataset = Dataset( 'decipher', 'Development Disorder Genotype – Phenotype Database', 'https://decipher.sanger.ac.uk/', None, 'https://decipher.sanger.ac.uk/legal') if 'test_ids' not in config.get_config() \ or 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = [] else: self.test_ids = config.get_config()['test_ids']['disease'] self.gu = GraphUtils(curie_map.get()) self.g = self.graph self.geno = Genotype(self.g) return
def __init__(self): Source.__init__(self, 'omim') self.load_bindings() self.dataset = Dataset('omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org', None, 'http://omim.org/help/agreement') # data-source specific warnings (will be removed when issues are cleared) # check if config exists; if it doesn't, error out and let user know if 'keys' not in config.get_config() and 'omim' not in config.get_config()['keys']: logger.error("not configured with API key.") # check to see if there's any ids configured in the config; otherwise, warn if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']: logger.warn("not configured with disease test ids.") else: # select ony those test ids that are omim's. self.test_ids += [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease'] if re.match('OMIM:', obj)] return
def __init__(self): Source.__init__(self, 'genereviews') self.load_bindings() self.dataset = Dataset( 'genereviews', 'Gene Reviews', 'http://genereviews.org/', None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/') self.dataset.set_citation('GeneReviews:NBK1116') self.gu = GraphUtils(curie_map.get()) self.book_ids = set() self.all_books = {} if 'test_ids' not in config.get_config() or\ 'disease' not in config.get_config()['test_ids']: logger.warning("not configured with disease test ids.") self.test_ids = list() else: # select ony those test ids that are omim's. self.test_ids = config.get_config()['test_ids']['disease'] return