def gene2go( pfam2goFn,fn,tblout,eTh=1e-05,source='broad'): """ """ pfam2go = load_pfam2go( pfam2goFn ) if source=="broad": prot2pfam = load_broad_pfam( tblout,eTh ) elif source=="tblout": prot2pfam = load_pfam_tblout( tblout,eTh ) else: sys.exit("Specify valid source: tblout or broad!") out = open( fn+'.go','w') for prot in prot2pfam: j=0 line='%s\t' % prot for pfam in prot2pfam[prot]: pfamShort = pfam.split('.')[0] if not pfamShort in pfam2go: continue for go in pfam2go[pfamShort]: line+='%s;' % go j+=1 if j: out.write( line.strip(';')+"\n" ) out.close()
def gene2go(pfam2goFn, fn, tblout, eTh=1e-05, source='broad'): """ """ pfam2go = load_pfam2go(pfam2goFn) if source == "broad": prot2pfam = load_broad_pfam(tblout, eTh) elif source == "tblout": prot2pfam = load_pfam_tblout(tblout, eTh) else: sys.exit("Specify valid source: tblout or broad!") out = open(fn + '.go', 'w') for prot in prot2pfam: j = 0 line = '%s\t' % prot for pfam in prot2pfam[prot]: pfamShort = pfam.split('.')[0] if not pfamShort in pfam2go: continue for go in pfam2go[pfamShort]: line += '%s;' % go j += 1 if j: out.write(line.strip(';') + "\n") out.close()
def report(files, pfam, annotation, tab, pTh, verbose): """ """ #load pfam annotation geneid2pfam, geneid2annotation, geneid2tab = {}, {}, {} if pfam: geneid2pfam = load_pfam_tblout(pfam) sys.stderr.write(" PFAMs for %s entries loaded.\n" % len(geneid2pfam)) if tab: geneid2tab = load_annotation(open(tab)) sys.stderr.write(" Tab annotation for %s entries loaded.\n" % len(geneid2tab)) if annotation: if annotation.endswith('.gff'): geneid2annotation = load_gff_annotation(open(annotation)) elif annotation.endswith('.gtf'): geneid2annotation = load_gtf_annotation(open(annotation)) else: geneid2annotation = load_annotation(open(annotation), True) sys.stderr.write(" Annotations for %s entries loaded.\n" % len(geneid2annotation)) #load all cuffdiff files fn2data = {} fnames = [] for f in files: fn = f.name fnames.append(fn) fn2data[fn] = load_cuffdiff(f) #write output header = "#transcript id\tgene id" for fn in fnames: header += "\tcontrol\t%s\tlog2(FC)\tP-value" % fn header += "\tannotation\n" sys.stdout.write(header) #open outfiles for ids outfiles = [open("%s.%s.ids" % (fn, pTh), "w") for fn in fnames] #process all genes for transid in sorted(fn2data[fn]): lineData = [] pFilter = False exprData = [fn2data[fn][transid] for fn in fnames] #; print exprData for exprTuple, out in zip(exprData, outfiles): geneid, locus, v1, v2, log_fc, p = exprTuple if not lineData: lineData = [transid, geneid] lineData += [str(v1), str(v2), str(log_fc), str(p)] #check p value if p <= pTh: pFilter = True out.write(geneid + "\n") if pFilter: #add PFAM annotation annList = [] if transid in geneid2pfam: if type(geneid2pfam[transid]) is list: annList.append(";".join(geneid2pfam[transid])) else: for pfam, data in geneid2pfam[transid].iteritems(): annList.append("%s [%s]" % (data[1], pfam)) lineData.append("; ".join(annList)) #add tab annotation annList = [] if geneid2tab and transid in geneid2tab: annList.append(";".join(geneid2tab[transid])) lineData.append("; ".join(annList)) #add Arabidopsis annotation if transid in geneid2annotation: for ann in geneid2annotation[transid]: lineData.append(ann) #output info sys.stdout.write("\t".join(lineData) + "\n") for out in outfiles: out.close()
def report(files, pfam, annotation, tab, pTh, verbose): """ """ #load pfam annotation geneid2pfam, geneid2annotation, geneid2tab = {}, {}, {} if pfam: geneid2pfam = load_pfam_tblout(pfam) sys.stderr.write(" PFAMs for %s entries loaded.\n" % len(geneid2pfam)) if tab: geneid2tab = load_annotation(open(tab)) sys.stderr.write(" Tab annotation for %s entries loaded.\n" % len(geneid2tab)) if annotation: if annotation.endswith('.gff'): geneid2annotation = load_gff_annotation(open(annotation)) elif annotation.endswith('.gtf'): geneid2annotation = load_gtf_annotation(open(annotation)) else: geneid2annotation = load_annotation(open(annotation),True ) sys.stderr.write( " Annotations for %s entries loaded.\n" % len(geneid2annotation) ) #load all cuffdiff files fn2data = {} fnames = [] for f in files: fn = f.name fnames.append(fn) fn2data[fn] = load_cuffdiff(f) #write output header = "#transcript id\tgene id" for fn in fnames: header += "\tcontrol\t%s\tlog2(FC)\tP-value" % fn header += "\tannotation\n" sys.stdout.write( header ) #open outfiles for ids outfiles = [ open( "%s.%s.ids" % (fn,pTh),"w") for fn in fnames ] #process all genes for transid in sorted(fn2data[fn]): lineData = [] pFilter=False exprData = [ fn2data[fn][transid] for fn in fnames ] #; print exprData for exprTuple,out in zip(exprData,outfiles): geneid,locus,v1,v2,log_fc,p = exprTuple if not lineData: lineData = [ transid,geneid ] lineData += [ str(v1), str(v2), str(log_fc),str(p) ] #check p value if p<=pTh: pFilter = True out.write( geneid+"\n" ) if pFilter: #add PFAM annotation annList=[] if transid in geneid2pfam: if type(geneid2pfam[transid]) is list: annList.append(";".join(geneid2pfam[transid])) else: for pfam,data in geneid2pfam[transid].iteritems(): annList.append( "%s [%s]" % (data[1],pfam) ) lineData.append( "; ".join(annList)) #add tab annotation annList=[] if geneid2tab and transid in geneid2tab: annList.append(";".join(geneid2tab[transid])) lineData.append( "; ".join(annList)) #add Arabidopsis annotation if transid in geneid2annotation: for ann in geneid2annotation[transid]: lineData.append( ann ) #output info sys.stdout.write( "\t".join( lineData ) + "\n" ) for out in outfiles: out.close()