Esempio n. 1
0
def gene2go( pfam2goFn,fn,tblout,eTh=1e-05,source='broad'):
  """
  """
  pfam2go = load_pfam2go( pfam2goFn )
  
  if   source=="broad":
    prot2pfam = load_broad_pfam( tblout,eTh )
  elif source=="tblout":
    prot2pfam = load_pfam_tblout( tblout,eTh )
  else:
    sys.exit("Specify valid source: tblout or broad!")
  
  out = open( fn+'.go','w')
  for prot in prot2pfam:
    j=0
    line='%s\t' % prot
    for pfam in prot2pfam[prot]:
      pfamShort = pfam.split('.')[0]
      if not pfamShort in pfam2go:
        continue
      for go in pfam2go[pfamShort]:
        line+='%s;' % go
        j+=1
        
    if j:
      out.write( line.strip(';')+"\n" )
  out.close()
Esempio n. 2
0
def gene2go(pfam2goFn, fn, tblout, eTh=1e-05, source='broad'):
    """
  """
    pfam2go = load_pfam2go(pfam2goFn)

    if source == "broad":
        prot2pfam = load_broad_pfam(tblout, eTh)
    elif source == "tblout":
        prot2pfam = load_pfam_tblout(tblout, eTh)
    else:
        sys.exit("Specify valid source: tblout or broad!")

    out = open(fn + '.go', 'w')
    for prot in prot2pfam:
        j = 0
        line = '%s\t' % prot
        for pfam in prot2pfam[prot]:
            pfamShort = pfam.split('.')[0]
            if not pfamShort in pfam2go:
                continue
            for go in pfam2go[pfamShort]:
                line += '%s;' % go
                j += 1

        if j:
            out.write(line.strip(';') + "\n")
    out.close()
Esempio n. 3
0
def report(files, pfam, annotation, tab, pTh, verbose):
    """ """
    #load pfam annotation
    geneid2pfam, geneid2annotation, geneid2tab = {}, {}, {}
    if pfam:
        geneid2pfam = load_pfam_tblout(pfam)
        sys.stderr.write(" PFAMs for %s entries loaded.\n" % len(geneid2pfam))
    if tab:
        geneid2tab = load_annotation(open(tab))
        sys.stderr.write(" Tab annotation for %s entries loaded.\n" %
                         len(geneid2tab))
    if annotation:
        if annotation.endswith('.gff'):
            geneid2annotation = load_gff_annotation(open(annotation))
        elif annotation.endswith('.gtf'):
            geneid2annotation = load_gtf_annotation(open(annotation))
        else:
            geneid2annotation = load_annotation(open(annotation), True)
        sys.stderr.write(" Annotations for %s entries loaded.\n" %
                         len(geneid2annotation))

    #load all cuffdiff files
    fn2data = {}
    fnames = []
    for f in files:
        fn = f.name
        fnames.append(fn)
        fn2data[fn] = load_cuffdiff(f)

    #write output
    header = "#transcript id\tgene id"
    for fn in fnames:
        header += "\tcontrol\t%s\tlog2(FC)\tP-value" % fn
    header += "\tannotation\n"
    sys.stdout.write(header)
    #open outfiles for ids
    outfiles = [open("%s.%s.ids" % (fn, pTh), "w") for fn in fnames]
    #process all genes
    for transid in sorted(fn2data[fn]):
        lineData = []
        pFilter = False
        exprData = [fn2data[fn][transid] for fn in fnames]  #; print exprData
        for exprTuple, out in zip(exprData, outfiles):
            geneid, locus, v1, v2, log_fc, p = exprTuple
            if not lineData:
                lineData = [transid, geneid]
            lineData += [str(v1), str(v2), str(log_fc), str(p)]
            #check p value
            if p <= pTh:
                pFilter = True
                out.write(geneid + "\n")

        if pFilter:
            #add PFAM annotation
            annList = []
            if transid in geneid2pfam:
                if type(geneid2pfam[transid]) is list:
                    annList.append(";".join(geneid2pfam[transid]))
                else:
                    for pfam, data in geneid2pfam[transid].iteritems():
                        annList.append("%s [%s]" % (data[1], pfam))
            lineData.append("; ".join(annList))
            #add tab annotation
            annList = []
            if geneid2tab and transid in geneid2tab:
                annList.append(";".join(geneid2tab[transid]))
            lineData.append("; ".join(annList))
            #add Arabidopsis annotation
            if transid in geneid2annotation:
                for ann in geneid2annotation[transid]:
                    lineData.append(ann)
            #output info
            sys.stdout.write("\t".join(lineData) + "\n")

    for out in outfiles:
        out.close()
Esempio n. 4
0
def report(files, pfam, annotation, tab, pTh, verbose):
    """ """
    #load pfam annotation
    geneid2pfam, geneid2annotation, geneid2tab = {}, {}, {}
    if pfam:
        geneid2pfam = load_pfam_tblout(pfam)
        sys.stderr.write(" PFAMs for %s entries loaded.\n" % len(geneid2pfam))
    if tab:
        geneid2tab = load_annotation(open(tab))
        sys.stderr.write(" Tab annotation for %s entries loaded.\n" % len(geneid2tab))  
    if annotation:
        if annotation.endswith('.gff'):
            geneid2annotation = load_gff_annotation(open(annotation))
        elif annotation.endswith('.gtf'):
            geneid2annotation = load_gtf_annotation(open(annotation))
        else:
            geneid2annotation = load_annotation(open(annotation),True )
        sys.stderr.write( " Annotations for %s entries loaded.\n" % len(geneid2annotation) )
                        
    #load all cuffdiff files
    fn2data = {}
    fnames  = []
    for f in files:
        fn = f.name
        fnames.append(fn)
        fn2data[fn] = load_cuffdiff(f)

    #write output
    header = "#transcript id\tgene id"
    for fn in fnames:
        header += "\tcontrol\t%s\tlog2(FC)\tP-value" % fn
    header += "\tannotation\n"
    sys.stdout.write( header )
    #open outfiles for ids
    outfiles = [ open( "%s.%s.ids" % (fn,pTh),"w") for fn in fnames  ]
    #process all genes
    for transid in sorted(fn2data[fn]):
        lineData = []
        pFilter=False
        exprData = [ fn2data[fn][transid] for fn in fnames ] #; print exprData
        for exprTuple,out in zip(exprData,outfiles):
            geneid,locus,v1,v2,log_fc,p = exprTuple
            if not lineData:
                lineData = [ transid,geneid ]
            lineData += [ str(v1), str(v2), str(log_fc),str(p) ]
            #check p value
            if p<=pTh:
                pFilter = True
                out.write( geneid+"\n" )
                
        if pFilter:
            #add PFAM annotation
            annList=[]
            if transid in geneid2pfam:
                if type(geneid2pfam[transid]) is list:
                    annList.append(";".join(geneid2pfam[transid]))
                else:
                    for pfam,data in geneid2pfam[transid].iteritems():
                        annList.append( "%s [%s]" % (data[1],pfam) ) 
            lineData.append( "; ".join(annList))
            #add tab annotation
            annList=[]
            if geneid2tab and transid in geneid2tab:
                annList.append(";".join(geneid2tab[transid]))
            lineData.append( "; ".join(annList))
            #add Arabidopsis annotation
            if transid in geneid2annotation:
                for ann in geneid2annotation[transid]:
                    lineData.append( ann )
            #output info
            sys.stdout.write( "\t".join( lineData ) + "\n" )

    for out in outfiles:
        out.close()