def parse(self, limit=None): zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized) model = Model(self.graph) zp_file = '/'.join((self.rawdir, self.files['zpmap']['file'])) g2p_file = '/'.join((self.rawdir, self.files['g2p_clean']['file'])) zfin_parser.zp_map = zfin_parser._load_zp_mappings(zp_file) with open(g2p_file, 'r', encoding="utf8") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: (internal_id, symbol, gene_id, subterm1_id, subterm1_label, pc_rel_id, pc_rel_label, superterm1_id, superterm1_label, quality_id, quality_name, modifier, subterm2_id, subterm2_label, pc_rel2_id, pc_rel2_id, superterm2_id, superterm2_label, fish_id, fish_label, start_stage, end_stage, environment, pub_id, figure_id, unknown_field) = row zp_id = zfin_parser._map_sextuple_to_phenotype( superterm1_id, subterm1_id, quality_id, superterm2_id, subterm2_id, modifier) gene_curie = "ZFIN:{0}".format(gene_id) model.makeLeader(gene_curie) pub_curie = "ZFIN:{0}".format(pub_id) if zp_id: assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id) if pub_id: reference = Reference(self.graph, pub_curie, Reference.ref_types['document']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence('ECO:0000059') assoc.add_association_to_graph()
def parse(self, limit=None): zfin_parser = ZFIN(self.graph_type, self.are_bnodes_skized) model = Model(self.graph) src_key = 'zpmap' # keep same-as zfin.files[key] zfin_parser.zp_map = zfin_parser._load_zp_mappings(src_key) src_key = 'g2p_clean' raw = '/'.join((self.rawdir, self.files[src_key]['file'])) LOG.info("Processing clean Geno to Pheno from file: %s", raw) col = self.files[src_key]['columns'] collen = len(col) with open(raw, 'r', encoding="utf8") as csvfile: reader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in reader: if len(row) != collen: LOG.warning('Row: %i has unexpected format', reader.line_num) # internal_id = row[col.index('ID')] # symbol = row[col.index('Gene Symbol')] gene_id = row[col.index('Gene ID')] subterm1_id = row[col.index( 'Affected Structure or Process 1 subterm ID')] # subterm1_label = row[col.index( # 'Affected Structure or Process 1 subterm Name')] pc_rel_id = row[col.index( 'Post-composed Relationship ID')].strip() # pc_rel_label = row[col.index('Post-composed Relationship Name')] superterm1_id = row[col.index( 'Affected Structure or Process 1 superterm ID')].strip() # superterm1_label = row[col.index( # 'Affected Structure or Process 1 superterm Name')] quality_id = row[col.index('Phenotype Keyword ID')].strip() # quality_name = row[col.index('Phenotype Keyword Name')] modifier = row[col.index('Phenotype Tag')].strip() subterm2_id = row[col.index( 'Affected Structure or Process 2 subterm ID')].strip() # subterm2_label = row[col.index( # 'Affected Structure or Process 2 subterm name')] pc_rel2_id = row[col.index( 'Post-composed Relationship (rel) ID')] # pc_rel2_label = row[col.index( # 'Post-composed Relationship (rel) Name')] superterm2_id = row[col.index( 'Affected Structure or Process 2 superterm ID')].strip() # superterm2_label = row[col.index( # 'Affected Structure or Process 2 superterm name')] # fish_id = row[col.index('Fish ID')] # fish_label = row[col.index('Fish Display Name')] start_stage = row[col.index('Start Stage ID')] # end_stage = row[col.index('End Stage ID')] # environment = row[col.index('Fish Environment ID')] pub_id = row[col.index('Publication ID')].strip() # figure_id = row[col.index('Figure ID')] if modifier != 'abnormal': LOG.warning( "skipping phenotype with modifier %s != abnormal ", modifier) continue zp_id = zfin_parser._map_octuple_to_phenotype( subterm1_id, pc_rel_id, superterm1_id, quality_id, subterm2_id, pc_rel2_id, superterm2_id, modifier) gene_curie = "ZFIN:{0}".format(gene_id) model.makeLeader(gene_curie) pub_curie = "ZFIN:{0}".format(pub_id) if zp_id: assoc = G2PAssoc(self.graph, self.name, gene_curie, zp_id) if pub_id: reference = Reference(self.graph, pub_curie, self.globaltt['document']) reference.addRefToGraph() assoc.add_source(pub_curie) assoc.add_evidence( self.globaltt['experimental phenotypic evidence']) assoc.add_association_to_graph()