예제 #1
0
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(graph_type, are_bnodes_skolemized, 'ctd')
        self.dataset = Dataset(
            'ctd', 'CTD', 'http://ctdbase.org', None,
            'http://ctdbase.org/about/legal.jsp')

        if 'test_ids' not in config.get_config() \
                or 'gene' not in config.get_config()['test_ids']:
            logger.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = config.get_config()['test_ids']['gene']

        if 'test_ids' not in config.get_config() \
                or 'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = config.get_config()['test_ids']['disease']

        self.g = self.graph
        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        return
예제 #2
0
파일: CTD.py 프로젝트: sgml/dipper
    def __init__(self, graph_type, are_bnodes_skolemized):
        super().__init__(
            graph_type,
            are_bnodes_skolemized,
            'ctd',
            ingest_title='Comparative Toxicogenomics Database',
            ingest_url='http://ctdbase.org',
            license_url=None,
            data_rights='http://ctdbase.org/about/legal.jsp'
            # file_handle=None
        )

        if 'gene' not in self.all_test_ids:
            LOG.warning("not configured with gene test ids.")
            self.test_geneids = []
        else:
            self.test_geneids = self.all_test_ids['gene']

        if 'disease' not in self.all_test_ids:
            LOG.warning("not configured with disease test ids.")
            self.test_diseaseids = []
        else:
            self.test_diseaseids = self.all_test_ids['disease']

        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        return
예제 #3
0
파일: KEGG.py 프로젝트: moon3stars/dipper
    def _process_pathway_ko(self, limit):
        """
        This adds the kegg orthologous group (gene) to the canonical pathway.
        :param limit:

        :return:
        """
        LOG.info("Processing KEGG pathways to kegg ortholog classes")
        if self.test_mode:
            graph = self.testgraph
        else:
            graph = self.graph
        line_counter = 0

        raw = '/'.join((self.rawdir, self.files['pathway_ko']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (ko_id, pathway_id) = row

                if self.test_mode and pathway_id not in self.test_ids['pathway']:
                    continue

                pathway_id = 'KEGG-' + pathway_id
                ko_id = 'KEGG-' + ko_id

                p = Pathway(graph)
                p.addGeneToPathway(ko_id, pathway_id)

                if not self.test_mode and limit is not None and line_counter > limit:
                    break

        return
예제 #4
0
파일: CTD.py 프로젝트: justaddcoffee/dipper
    def parse(self, limit=None):
        """
        Override Source.parse()
        Parses version and interaction information from CTD
        Args:
        :param limit (int, optional) limit the number of rows processed
        Returns:
        :return None
        """
        if limit is not None:
            LOG.info("Only parsing first %d rows", limit)

        LOG.info("Parsing files...")

        if self.test_only:
            self.test_mode = True

        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        src_key = 'chemical_disease_associations'
        self._parse_ctd_file(limit, src_key)

        # self._parse_ctd_file(limit, 'gene_pathway')
        # self._parse_ctd_file(limit, 'gene_disease')

        src_key = 'publications'
        file_path = '/'.join((self.rawdir, self.api_fetch[src_key]['file']))
        if os.path.exists(file_path) is True:
            self._parse_curated_chem_disease(file_path, limit)
        else:
            LOG.error('Batch Query file "%s" does not exist', file_path)
        LOG.info("Done parsing files.")
예제 #5
0
    def _add_component_pathway_association(self, gene_curie, pathway_curie,
                                           pathway_label, eco_curie):

        pathway = Pathway(self.graph)
        pathway.addPathway(pathway_curie, pathway_label)
        pathway.addComponentToPathway(gene_curie, pathway_curie)
        association = Assoc(self.graph, self.name)
        association.sub = gene_curie
        association.rel = self.globaltt['involved in']
        association.obj = pathway_curie
        association.set_association_id()
        association.add_evidence(eco_curie)
        association.add_association_to_graph()
예제 #6
0
    def _process_pathways(self, limit=None):
        """
        This method adds the KEGG pathway IDs.
        These are the canonical pathways as defined in KEGG.
        We also encode the graphical depiction
        which maps 1:1 with the identifier.

        Triples created:
        <pathway_id> is a GO:signal_transduction
        <pathway_id> rdfs:label <pathway_name>
        <gene_id> RO:involved_in <pathway_id>
        :param limit:
        :return:

        """

        logger.info("Processing pathways")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        model = Model(g)
        line_counter = 0
        path = Pathway(g)
        raw = '/'.join((self.rawdir, self.files['pathway']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (pathway_id, pathway_name) = row

                if self.testMode and \
                        pathway_id not in self.test_ids['pathway']:
                    continue

                pathway_id = 'KEGG-'+pathway_id.strip()
                path.addPathway(pathway_id, pathway_name)

                # we know that the pathway images from kegg map 1:1 here.
                # so add those
                image_filename = re.sub(r'KEGG-path:', '', pathway_id) + '.png'
                image_url = \
                    'http://www.genome.jp/kegg/pathway/map/'+image_filename
                model.addDepiction(pathway_id, image_url)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

        logger.info("Done with pathways")
        return
예제 #7
0
    def _add_component_pathway_association(
            self, eco_map, component, component_prefix, pathway_id,
            pathway_prefix, pathway_label, go_ecode):
        pathway = Pathway(self.graph)

        pathway_curie = "{}:{}".format(pathway_prefix, pathway_id)
        gene_curie = "{}:{}".format(component_prefix, component.strip())
        eco_curie = eco_map[go_ecode]
        pathway.addPathway(pathway_curie, pathway_label)
        pathway.addComponentToPathway(gene_curie, pathway_curie)

        association = Assoc(self.graph, self.name)
        association.sub = gene_curie
        association.rel = self.globaltt['involved in']
        association.obj = pathway_curie
        association.set_association_id()
        association.add_evidence(eco_curie)
        association.add_association_to_graph()
        return
예제 #8
0
    def parse(self, limit=None):
        """
        Override Source.parse()
        Parses version and interaction information from CTD
        Args:
        :param limit (int, optional) limit the number of rows processed
        Returns:
        :return None
        """
        if limit is not None:
            logger.info("Only parsing first %d rows", limit)

        logger.info("Parsing files...")
        # pub_map = dict()
        # file_path = '/'.join((self.rawdir,
        # self.static_files['publications']['file']))
        # if os.path.exists(file_path) is True:
        #     pub_map = self._parse_publication_file(
        #         self.static_files['publications']['file']
        #     )

        if self.testOnly:
            self.testMode = True

        if self.testMode:
            self.g = self.testgraph
        else:
            self.g = self.graph
        self.geno = Genotype(self.g)
        self.pathway = Pathway(self.g)

        self._parse_ctd_file(
            limit, self.files['chemical_disease_interactions']['file'])
        self._parse_ctd_file(limit, self.files['gene_pathway']['file'])
        self._parse_ctd_file(limit, self.files['gene_disease']['file'])
        self._parse_curated_chem_disease(limit)

        logger.info("Done parsing files.")

        return
예제 #9
0
    def parse(self, limit=None):
        """
        Override Source.parse()
        Parses version and interaction information from CTD
        Args:
        :param limit (int, optional) limit the number of rows processed
        Returns:
        :return None
        """
        if limit is not None:
            LOG.info("Only parsing first %d rows", limit)

        LOG.info("Parsing files...")

        if self.test_only:
            self.test_mode = True

        self.geno = Genotype(self.graph)
        self.pathway = Pathway(self.graph)

        src_key = 'chemical_disease_associations'
        self._parse_ctd_file(limit, src_key)