def integrator( self, variant, out_splts ) : # get the variant field information from the first out_splt # remember there may be multiple lines all with the same eqkey() evaluation, # corresponding to different isoforms for c in self.cols : variant.fields[c] = self.nullify( out_splts[0][indexOf[c]] ) #make variant.isoforms out of each splt for out_splt in out_splts : iso = Isoform() pp = out_splt[indexOf["proteinPosition"]] if not pp == '' and not pp == 'NA': splt = pp.split('/') pos,tot = int(splt[0]), int(splt[1]) iso.fields["codon_pos"] = pos iso.fields["codon_total"] = tot iso.fields["accession"] = self.nullify(out_splt[indexOf["accession"]]) iso.fields["functionGVS"] = self.nullify(out_splt[indexOf["functionGVS"]]) iso.fields["polyPhen"] = self.nullify(out_splt[indexOf["polyPhen"]]) aas = out_splt[indexOf["aminoAcids"]].split(',') if len(aas) == 2 : iso.fields["ref_aa"] = aas[0] iso.fields["mut_aa"] = aas[1] variant.isoforms.append(iso) iso.fields["gene"] = self.nullify( out_splt[indexOf["geneList"]] ) return variant
def integrator( self, variant, out_splts ) : #get the variant field information from the first out_splt #remember there may be multiple corresponding to different isoforms for (c,dbc) in zip(self.cols,self.db_cols) : variant.fields[c] = self.nullify( out_splts[0][indexOf[c]] ) #make variant.isoforms out of each splt for out_splt in out_splts : pp = out_splt[indexOf["proteinPosition"]] pos,tot = -1,-1 if not pp == '' and not pp == 'NA': splt = pp.split('/') pos,tot = int(splt[0]), int(splt[1]) iso = Isoform() iso.fields["accession"] = self.nullify(out_splt[indexOf["accession"]]) iso.fields["ss_functionGVS"] = self.nullify(out_splt[indexOf["functionGVS"]]) iso.fields["ss_polyPhen"] = self.nullify(out_splt[indexOf["polyPhen"]]) iso.fields["codon_pos"] = pos iso.fields["codon_total"] = tot aas = out_splt[indexOf["aminoAcids"]].split(',') if len(aas) == 2 : iso.fields["ref_aa"] = aas[0] iso.fields["mut_aa"] = aas[1] variant.isoforms.append(iso) return variant
def varListIntegrator(self,variant,out_splt) : novel_variant = "ref_aa" not in variant.fields messages = [] if novel_variant : aas = out_splt[ self.indexOf["old_aa/new_aa"] ].split('/') if len(aas) == 2 : variant.fields["ref_aa"] = aas[0] variant.fields["mut_aa"] = aas[1] iso_keys = ["transcript_id", \ "exon_rank", "effect"] iso = Isoform() for ik in iso_keys : iso.fields[ik] = out_splt[ self.indexOf[ik] ] codon_pos = out_splt[ self.indexOf["codon_num(cds)"] ] if codon_pos : iso.fields["codon_pos"] = int(codon_pos) #else : iso.fields["codon_pos"] = codon_pos cdssize = out_splt[ self.indexOf["cds_size"] ] dbname = "codon_total" if not cdssize == '' : iso.fields[dbname] = int(cdssize)/3 #else : iso.fields[dbname] = cdssize k = "gene_id" ucsc_id = out_splt[self.indexOf[k]] if not ucsc_id == '' : query = "select id from Genes where ucsc_id = '%s'" \ % ucsc_id gene_ids = self.conn.query(query) if len(gene_ids) > 0 : for gene_id in [int(row[0]) for row in gene_ids] : clone = iso.clone() clone.fields[k] = gene_id variant.isoforms.append(clone) else : gid = importer.makeEmptyGene( self.conn, 'ucsc_id', ucsc_id ) iso.fields[k] = gid messages.append( 'No gene id for ucsc_id=|%s|, create an empty gene, id: %d' % (ucsc_id,gid) ) variant.isoforms.append(iso) else : iso.fields[k] = -1 variant.isoforms.append(iso) #print "varListIntegrator, variant got: %d isoforms" % len(variant.isoforms) return '| \t |'.join(messages)