コード例 #1
0
    def __init__(self):
        Source.__init__(self, 'hpoa')

        self.load_bindings()

        self.dataset = Dataset(
            'hpoa', 'Human Phenotype Ontology',
            'http://www.human-phenotype-ontology.org', None,
            'http://www.human-phenotype-ontology.org/contao/index.php/legal-issues.html')

        self.replaced_id_count = 0

        if 'test_ids' not in config.get_config()\
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        # data-source specific warnings to be removed when issues are cleared
        logger.warning(
            "note that some ECO classes are missing for ICE, PCS, and ITM;" +
            " using temporary mappings.")

        return
コード例 #2
0
ファイル: MPD.py プロジェクト: TomConlin/dipper
    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(
            self,
            graph_type,
            are_bnodes_skolemized,
            'mpd',
            ingest_title='Mouse Phenome Database',
            ingest_url='https://phenome.jax.org/',
            # license_url=None,
            data_rights='https://phenome.jax.org/about/termsofuse'
            # file_handle=None
        )

        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return
コード例 #3
0
ファイル: ClinVar.py プロジェクト: d3borah/dipper
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'clinvar')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'
        self.load_bindings()

        self.dataset = Dataset('ClinVar', 'National Center for Biotechnology Information', 
                               'http://www.ncbi.nlm.nih.gov/clinvar/', None,
                               'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
                               'https://creativecommons.org/publicdomain/mark/1.0/')

        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            self.disease_ids = config.get_config()['test_ids']['disease']

        self.properties = Feature.properties

        return
コード例 #4
0
ファイル: GeneOntology.py プロジェクト: JervenBolleman/dipper
    def __init__(self, tax_ids=None):
        Source.__init__(self, 'go')

        # Defaults
        self.tax_ids = tax_ids
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'go', 'GeneOntology', 'http://www.geneontology.org', None,
            "https://creativecommons.org/licenses/by/4.0/legalcode",
            'http://geneontology.org/page/use-and-license')

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']['gene']

        return
コード例 #5
0
ファイル: NCBIGene.py プロジェクト: d3borah/dipper
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ncbigene')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.filter = 'taxids'
        self.load_bindings()

        self.dataset = Dataset('ncbigene', 'National Center for Biotechnology Information',
                               'http://ncbi.nih.nlm.gov/gene', None,
                               'http://www.ncbi.nlm.nih.gov/About/disclaimer.html',
                               'https://creativecommons.org/publicdomain/mark/1.0/')
        # data-source specific warnings (will be removed when issues are cleared)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]
            logger.info("No taxa set.  Defaulting to %s", str(tax_ids))
        else:
            logger.info("Filtering on the following taxa: %s", str(tax_ids))

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return
コード例 #6
0
ファイル: CTD.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'ctd')
        self.dataset = Dataset(
            'ctd', 'CTD', 'http://ctdbase.org', None,
            'http://ctdbase.org/about/legal.jsp')

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return
コード例 #7
0
ファイル: MPD.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'mpd')
        # @N, not sure if this step is required
        self.namespaces.update(curie_map.get())
        self.stdevthreshold = 2

        self.nobnodes = True  # FIXME

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        self.geno = Genotype(self.graph)
        self.gu = GraphUtils(curie_map.get())

        return
コード例 #8
0
ファイル: Ensembl.py プロジェクト: JervenBolleman/dipper
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'ensembl')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset(
            'ensembl', 'ENSEMBL', 'http://www.ensembl.org', None)

        # Defaults
        if self.tax_ids is None:
            self.tax_ids = [9606, 10090, 7955]

        self.gene_ids = []
        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        logger.setLevel(logging.INFO)

        return
コード例 #9
0
ファイル: MPD.py プロジェクト: alexgarciac/dipper
    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(
            self,
            graph_type,
            are_bnodes_skolemized,
            'mpd',
            ingest_title='Mouse Phenome Database',
            ingest_url='https://phenome.jax.org/',
            # license_url=None,
            data_rights='https://phenome.jax.org/about/termsofuse'
            # file_handle=None
        )

        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return
コード例 #10
0
ファイル: MMRRC.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'mmrrc')
        self.strain_hash = {}
        self.id_label_hash = {}
        self.load_bindings()
        self.dataset = Dataset(
            'mmrrc', 'Mutant Mouse Regional Resource Centers',
            'https://www.mmrrc.org', None,
            'https://www.mmrrc.org/about/data_download.php')

        return
コード例 #11
0
ファイル: WormBase.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'wormbase')

        # update the dataset object with details about this resource
        # NO LICENSE for this resource
        self.dataset = Dataset(
            'wormbase', 'WormBase', 'http://www.wormbase.org', None, None,
            'http://www.wormbase.org/about/policies#012')

        self.version_num = None
        return
コード例 #12
0
ファイル: IMPC.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'impc')

        # update the dataset object with details about this resource
        self.dataset = Dataset('impc', 'IMPC', 'http://www.mousephenotype.org', None,
                               'https://raw.githubusercontent.com/mpi2/PhenotypeArchive/master/LICENSE')

        # TODO add a citation for impc dataset as a whole
        # :impc cito:citesAsAuthority PMID:24194600

        return
コード例 #13
0
ファイル: AnimalQTLdb.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'animalqtldb')

        # update the dataset object with details about this resource
        self.dataset = Dataset('animalqtldb', 'Animal QTL db',
                               'http://www.animalgenome.org/cgi-bin/QTLdb/index', None, None,
                               'http://www.animalgenome.org/QTLdb/faq#23')

        # source-specific warnings.  will be cleared when resolved.
        logger.warn("No licences or rights exist for the raw data from this resource.")

        return
コード例 #14
0
ファイル: EOM.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'eom')
        self.namespaces.update(curie_map.get())

        # update the dataset object with details about this resource
        # TODO put this into a conf file?
        self.dataset = Dataset('eom', 'EOM', 'http://elementsofmorphology.nih.gov', None, 
                               'http://www.genome.gov/copyright.cfm',
                               'https://creativecommons.org/publicdomain/mark/1.0/')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'disco' not in config.get_config()['dbauth']:
            logger.error("not configured with PG user/password.")

        # source-specific warnings.  will be cleared when resolved.

        return
コード例 #15
0
ファイル: Orphanet.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'orphanet')

        self.load_bindings()

        self.dataset = Dataset(
            'orphanet', 'Orphanet', 'http://www.orpha.net', None,
            'http://creativecommons.org/licenses/by-nd/3.0/',
            'http://omim.org/help/agreement')

        # check to see if there's any ids configured in the config;
        # otherwise, warn
        if 'test_ids' not in config.get_config() or \
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")

        return
コード例 #16
0
ファイル: Coriell.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'coriell')

        self.load_bindings()

        self.dataset = Dataset('coriell', 'Coriell', 'http://ccr.coriell.org/', None)

        # data-source specific warnings (will be removed when issues are cleared)

        logger.warn('We assume that if a species is not provided, that it is a Human-derived cell line')
        logger.warn('We map all omim ids as a disease/phenotype entity, but should be fixed in the future')

        # check if config exists; if it doesn't, error out and let user know
        if 'dbauth' not in config.get_config() or 'coriell' not in config.get_config()['dbauth']:
            logger.error("not configured with FTP user/password.")

        return
コード例 #17
0
ファイル: HGNC.py プロジェクト: d3borah/dipper
    def __init__(self, tax_ids=None, gene_ids=None):
        Source.__init__(self, 'hgnc')

        self.tax_ids = tax_ids
        self.gene_ids = gene_ids
        self.load_bindings()

        self.dataset = Dataset('hgnc', 'HGNC', 'http://www.genenames.org', None)

        self.gene_ids = []
        if 'test_ids' not in config.get_config() or 'gene' not in config.get_config()['test_ids']:
            logger.warn("not configured with gene test ids.")
        else:
            self.gene_ids = config.get_config()['test_ids']['gene']

        self.properties = Feature.properties

        return
コード例 #18
0
ファイル: GWASCatalog.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'gwascatalog')

        self.load_bindings()

        self.dataset = Dataset(
            'gwascatalog', 'GWAS Catalog', 'http://www.ebi.ac.uk/gwas/',
            'The NHGRI-EBI Catalog of published genome-wide association studies',
            'http://creativecommons.org/licenses/by/3.0/', None)
        # 'http://www.ebi.ac.uk/gwas/docs/about'  # TODO add this

        if 'test_ids' not in config.get_config() or \
                'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
        else:
            self.test_ids = config.get_config()['test_ids']

        return
コード例 #19
0
ファイル: KEGG.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'kegg')

        # update the dataset object with details about this resource
        self.dataset = Dataset('kegg', 'KEGG', 'http://www.genome.jp/kegg/', None, None,
                               'http://www.kegg.jp/kegg/legal.html')

        # source-specific warnings.  will be cleared when resolved.
        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            self.test_ids['disease'] += config.get_config()['test_ids']['disease']

        self.label_hash = {}
        self.omim_disease_hash = {}  # to hold the mappings of omim:kegg ids
        self.kegg_disease_hash = {}  # to hold the mappings of kegg:omim ids

        return
コード例 #20
0
ファイル: OMIA.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'omia')

        self.load_bindings()

        self.dataset = Dataset(
            'omia', 'Online Mendelian Inheritance in Animals',
            'http://omia.angis.org.au', None, None,
            'http://sydney.edu.au/disclaimer.shtml')

        self.id_hash = {
            'article': {},
            'phene': {},
            'breed': {},
            'taxon': {},
            'gene': {}
        }
        self.label_hash = {}
        self.gu = GraphUtils(curie_map.get())
        # used to store the omia to omim phene mappings
        self.omia_omim_map = {}
        # used to store the unique genes that have phenes
        # (for fetching orthology)
        self.annotated_genes = set()

        self.test_ids = {
            'disease': [
                'OMIA:001702', 'OMIA:001867', 'OMIA:000478', 'OMIA:000201',
                'OMIA:000810', 'OMIA:001400'],
            'gene': [
                492297, 434, 492296, 3430235, 200685834, 394659996, 200685845,
                28713538, 291822383],
            'taxon': [9691, 9685, 9606, 9615, 9913, 93934, 37029, 9627, 9825],
            # to be filled in during parsing of breed table
            # for lookup by breed-associations
            'breed': []
        }
        # to store a map of omia ids and any molecular info
        # to write a report for curation
        self.stored_omia_mol_gen = {}
        self.g = self.graph
        self.geno = Genotype(self.g)
        return
コード例 #21
0
ファイル: Orphanet.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, "orphanet")

        self.load_bindings()

        self.dataset = Dataset(
            "orphanet",
            "Orphanet",
            "http://www.orpha.net",
            None,
            "http://creativecommons.org/licenses/by-nd/3.0/",
            "http://omim.org/help/agreement",
        )

        # check to see if there's any ids configured in the config; otherwise, warn
        if "test_ids" not in config.get_config() or "disease" not in config.get_config()["test_ids"]:
            logger.warn("not configured with disease test ids.")

        return
コード例 #22
0
ファイル: MPD.py プロジェクト: putmantime/dipper
    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd')
        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return
コード例 #23
0
ファイル: MPD.py プロジェクト: DoctorBud/dipper
    def __init__(self, graph_type, are_bnodes_skolemized):
        Source.__init__(self, graph_type, are_bnodes_skolemized, 'mpd')
        # @N, not sure if this step is required
        self.stdevthreshold = 2

        # update the dataset object with details about this resource
        # @N: Note that there is no license as far as I can tell
        self.dataset = Dataset(
            'mpd', 'MPD', 'http://phenome.jax.org', None, None)

        # TODO add a citation for mpd dataset as a whole
        self.dataset.set_citation('PMID:15619963')

        self.assayhash = {}
        self.idlabel_hash = {}
        # to store the mean/zscore of each measure by strain+sex
        self.score_means_by_measure = {}
        # to store the mean value for each measure by strain+sex
        self.strain_scores_by_measure = {}

        return
コード例 #24
0
ファイル: Decipher.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'decipher')

        self.load_bindings()

        self.dataset = Dataset(
            'decipher', 'Development Disorder Genotype – Phenotype Database',
            'https://decipher.sanger.ac.uk/', None,
            'https://decipher.sanger.ac.uk/legal')

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = []
        else:
            self.test_ids = config.get_config()['test_ids']['disease']

        self.gu = GraphUtils(curie_map.get())
        self.g = self.graph
        self.geno = Genotype(self.g)

        return
コード例 #25
0
ファイル: OMIM.py プロジェクト: d3borah/dipper
    def __init__(self):
        Source.__init__(self, 'omim')

        self.load_bindings()

        self.dataset = Dataset('omim', 'Online Mendelian Inheritance in Man', 'http://www.omim.org',
                               None, 'http://omim.org/help/agreement')

        # data-source specific warnings (will be removed when issues are cleared)

        # check if config exists; if it doesn't, error out and let user know
        if 'keys' not in config.get_config() and 'omim' not in config.get_config()['keys']:
            logger.error("not configured with API key.")

        # check to see if there's any ids configured in the config; otherwise, warn
        if 'test_ids' not in config.get_config() or 'disease' not in config.get_config()['test_ids']:
            logger.warn("not configured with disease test ids.")
        else:
            # select ony those test ids that are omim's.
            self.test_ids += [obj.replace('OMIM:', '') for obj in config.get_config()['test_ids']['disease']
                              if re.match('OMIM:', obj)]

        return
コード例 #26
0
ファイル: GeneReviews.py プロジェクト: JervenBolleman/dipper
    def __init__(self):
        Source.__init__(self, 'genereviews')

        self.load_bindings()

        self.dataset = Dataset(
            'genereviews', 'Gene Reviews', 'http://genereviews.org/',
            None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.gu = GraphUtils(curie_map.get())

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return