예제 #1
0
    def parse(self, limit=None):
        zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized)
        model = Model(self.graph)
        zp_file = '/'.join((self.rawdir, self.files['zpmap']['file']))
        g2p_file = '/'.join((self.rawdir, self.files['g2p_clean']['file']))
        zfin_parser.zp_map = zfin_parser._load_zp_mappings(zp_file)

        with open(g2p_file, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:

                (internal_id, symbol, gene_id, subterm1_id, subterm1_label,
                 pc_rel_id, pc_rel_label, superterm1_id, superterm1_label,
                 quality_id, quality_name, modifier, subterm2_id,
                 subterm2_label, pc_rel2_id, pc_rel2_id, superterm2_id,
                 superterm2_label, fish_id, fish_label, start_stage, end_stage,
                 environment, pub_id, figure_id, unknown_field) = row

                zp_id = zfin_parser._map_sextuple_to_phenotype(
                    superterm1_id, subterm1_id, quality_id, superterm2_id,
                    subterm2_id, modifier)

                gene_curie = "ZFIN:{0}".format(gene_id)
                model.makeLeader(gene_curie)
                pub_curie = "ZFIN:{0}".format(pub_id)
                if zp_id:
                    assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id)
                    if pub_id:
                        reference = Reference(self.graph, pub_curie,
                                              Reference.ref_types['document'])
                        reference.addRefToGraph()
                        assoc.add_source(pub_curie)

                    assoc.add_evidence('ECO:0000059')
                    assoc.add_association_to_graph()
예제 #2
0
    def parse(self, limit=None):
        zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized)
        model = Model(self.graph)
        zp_file = '/'.join((self.rawdir, self.files['zpmap']['file']))
        g2p_file = '/'.join((self.rawdir, self.files['g2p_clean']['file']))
        zfin_parser.zp_map = zfin_parser._load_zp_mappings(zp_file)

        with open(g2p_file, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:

                (internal_id, symbol, gene_id, subterm1_id, subterm1_label,
                 pc_rel_id, pc_rel_label, superterm1_id, superterm1_label,
                 quality_id, quality_name, modifier, subterm2_id,
                 subterm2_label, pc_rel2_id, pc_rel2_id, superterm2_id,
                 superterm2_label, fish_id, fish_label, start_stage, end_stage,
                 environment, pub_id, figure_id, unknown_field) = row

                zp_id = zfin_parser._map_sextuple_to_phenotype(
                    superterm1_id, subterm1_id, quality_id, superterm2_id,
                    subterm2_id, modifier)

                gene_curie = "ZFIN:{0}".format(gene_id)
                model.makeLeader(gene_curie)
                pub_curie = "ZFIN:{0}".format(pub_id)
                if zp_id:
                    assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id)
                    if pub_id:
                        reference = Reference(self.graph, pub_curie,
                                              Reference.ref_types['document'])
                        reference.addRefToGraph()
                        assoc.add_source(pub_curie)

                    assoc.add_evidence('ECO:0000059')
                    assoc.add_association_to_graph()
예제 #3
0
파일: test_zfin.py 프로젝트: sgml/dipper
class ZFINTestCase(SourceTestCase):
    def setUp(self):
        self.source = ZFIN('rdf_graph', True)
        self.source.settestonly(True)
        self._setDirToSource()

    def tearDown(self):
        self.source = None

    def test_mapping_of_phenotypes_to_zp_ids(self):
        """
        test that code correctly uses zp_map to map phenotypes to zp ids
        :return:

        """
        mapping_file = "./tests/resources/zfin/zp-mapping-test-map.txt"
        pheno_file = "./tests/resources/zfin/zp-mapping-test-phenotype.txt"

        self.source.zp_map = self.source._load_zp_mappings(mapping_file)
        pheno_dat = open(pheno_file).read().split('\t')

        (fish_num, fish_name, start_stage_id, start_stage_name, end_stage_id,
         end_stage_name, subterm1_id, subterm1_name, postcomp1_rel_id,
         postcomp1_rel_name, superterm1_id, superterm1_name, quality_id,
         quality_name, modifier, subterm2_id, subterm2_name, postcomp2_rel_id,
         postcomp2_rel_name, superterm2_id, superterm2_name, pub_id, env_id) = \
            pheno_dat

        self.assertEqual(
            self.source._map_octuple_to_phenotype(
                subterm1_id, postcomp1_rel_id, superterm1_id, quality_id,
                subterm2_id, postcomp2_rel_id, superterm2_id, "abnormal"),
            'ZP:0022140')

    def test_load_zp_mappings(self):
        """
        test correct loading of zp mappings file and construction of zp_map

        """
        if self.source is not None:
            try:
                zp_map = self.source._load_zp_mappings(
                    "./tests/resources/zfin/zp-mapping-test.txt")
                self.assertIsInstance(
                    zp_map, dict, "_load_zp_mappings() didn't return dict!")
                self.assertTrue(
                    len(zp_map) == 1,
                    "_load_zp_mappings() didn't return exactly one thing!")
                self.assertDictEqual(
                    zp_map, {
                        'MONARCH:b308a8f1c67793a56d16': {
                            'post_composed_relationship_id_1': 'BFO:0000050',
                            'post_composed_relationship_id_2': 'BFO:0000050',
                            'quality_id': 'PATO:0001453',
                            'subterm1_id': 'ZFA:0009114',
                            'subterm2_id': 'GO:0005927',
                            'superterm1_id': 'ZFA:0001056',
                            'superterm2_id': 'ZFA:0001056',
                            'zp_id': 'ZP:0002959',
                            'modifier': 'PATO:0000460'
                        }
                    },
                    "_load_zp_mappings() " + "didn't return what I expected!")
            except Exception as t_except:
                LOGGER.error(t_except)

    def test_make_zpkey(self):
        """
        test that _make_zpkey returns correct id

        """
        if self.source is not None:
            try:
                dummy_args = list(map(str, list(range(1,
                                                      9))))  # 1 - 8 as strings
                expected_key = self.source.make_id("_".join(dummy_args))
                self.assertEqual(self.source._make_zpkey(*dummy_args),
                                 expected_key)
                self.assertEqual(
                    self.source._make_zpkey(['0'] * 8),
                    self.source._make_zpkey([''] * 8),
                    "_make_zpkey() doesn't seem to be replacing empty " +
                    "strings with zeros before making key," +
                    "this might cause zp_map lookup issues")

            except Exception as t_except:
                LOGGER.error(t_except)

    def test_genotype_labels(self):
        """
        test that genotype label is set correctly after parse()

        """
        if self.source is not None:
            test_resource_dir = "../../tests/resources/zfin/"
            self.source.files['fish_components']['file'] = test_resource_dir + \
                "genotype-label-test-fish_components_fish.txt"
            self.source.files['backgrounds']['file'] = test_resource_dir + \
                "genotype-label-test-genotype_backgrounds.txt"
            self.source.files['geno']['file'] = test_resource_dir + \
                "genotype-label-test-genotype_features.txt"

            self.source.parse()

            this_iri = URIRef("http://zfin.org/ZDB-GENO-070228-3")
            expect_genotype_label = "shha<sup>tbx392/tbx392</sup> (AB)"
            self.assertEqual(str(self.source.testgraph.label(this_iri, None)),
                             expect_genotype_label)
예제 #4
0
    def parse(self, limit=None):
        zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized)
        model = Model(self.graph)

        src_key = 'zpmap'  # keep same-as zfin.files[key]
        zfin_parser.zp_map = zfin_parser._load_zp_mappings(src_key)

        src_key = 'g2p_clean'
        raw = '/'.join((self.rawdir, self.files[src_key]['file']))
        LOG.info("Processing clean Geno to Pheno from file: %s", raw)
        col = self.files[src_key]['columns']
        collen = len(col)
        with open(raw, 'r', encoding="utf8") as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in reader:
                if len(row) != collen:
                    LOG.warning('Row: %i has unexpected format',
                                reader.line_num)
                # internal_id = row[col.index('ID')]
                # symbol = row[col.index('Gene Symbol')]
                gene_id = row[col.index('Gene ID')]
                subterm1_id = row[col.index(
                    'Affected Structure or Process 1 subterm ID')]
                # subterm1_label = row[col.index(
                #    'Affected Structure or Process 1 subterm Name')]
                pc_rel_id = row[col.index(
                    'Post-composed Relationship ID')].strip()
                # pc_rel_label = row[col.index('Post-composed Relationship Name')]
                superterm1_id = row[col.index(
                    'Affected Structure or Process 1 superterm ID')].strip()
                # superterm1_label = row[col.index(
                #    'Affected Structure or Process 1 superterm Name')]
                quality_id = row[col.index('Phenotype Keyword ID')].strip()
                # quality_name = row[col.index('Phenotype Keyword Name')]
                modifier = row[col.index('Phenotype Tag')].strip()
                subterm2_id = row[col.index(
                    'Affected Structure or Process 2 subterm ID')].strip()
                # subterm2_label = row[col.index(
                #    'Affected Structure or Process 2 subterm name')]
                pc_rel2_id = row[col.index(
                    'Post-composed Relationship (rel) ID')]
                # pc_rel2_label = row[col.index(
                #   'Post-composed Relationship (rel) Name')]
                superterm2_id = row[col.index(
                    'Affected Structure or Process 2 superterm ID')].strip()
                # superterm2_label = row[col.index(
                #    'Affected Structure or Process 2 superterm name')]
                # fish_id = row[col.index('Fish ID')]
                # fish_label = row[col.index('Fish Display Name')]
                start_stage = row[col.index('Start Stage ID')]
                # end_stage = row[col.index('End Stage ID')]
                # environment = row[col.index('Fish Environment ID')]
                pub_id = row[col.index('Publication ID')].strip()
                # figure_id = row[col.index('Figure ID')]

                if modifier != 'abnormal':
                    LOG.warning(
                        "skipping phenotype with modifier %s != abnormal ",
                        modifier)
                    continue

                zp_id = zfin_parser._map_octuple_to_phenotype(
                    subterm1_id, pc_rel_id, superterm1_id, quality_id,
                    subterm2_id, pc_rel2_id, superterm2_id, modifier)

                gene_curie = "ZFIN:{0}".format(gene_id)
                model.makeLeader(gene_curie)
                pub_curie = "ZFIN:{0}".format(pub_id)
                if zp_id:
                    assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id)
                    if pub_id:
                        reference = Reference(self.graph, pub_curie,
                                              self.globaltt['document'])
                        reference.addRefToGraph()
                        assoc.add_source(pub_curie)

                    assoc.add_evidence(
                        self.globaltt['experimental phenotypic evidence'])
                    assoc.add_association_to_graph()