Example #1
0
 def write_html_report(self):
     """
     write the report as html
     """
     
     out_folder = self.opts.outdir
     my_template = Template(
         filename=os.path.join(self.tool_folder, self.__class__.HTML_REPORT_TEMPLATE), 
         strict_undefined=True)
     
     if os.path.exists(os.path.abspath(out_folder)):
         files = [os.path.join(out_folder, x) for x in os.listdir(out_folder) 
                  if not x.startswith('.')]
         files.sort(key=lambda f: os.path.getmtime(f))
         file_info = [(os.path.split(f)[-1], getFileString(os.path.split(f)[-1], self.opts.outdir))
                      for f in files]
     else:
         file_info = []
           
     template_parameters = {
         'program_name': str(self.program_name), 
         'timestamp' : str(timenow()),
         'file_info': file_info,
         'log_data' : open(self.tlogname).read().replace('\n', '\n<BR />'),
         'command_string': str(self.cl)
         }
         
     f = open(self.opts.htmlout, 'w')
     f.write(my_template.render(**template_parameters))
     f.close()
    def write_html_report(self):
        """
        write the report as html
        """

        out_folder = self.opts.outdir
        my_template = Template(filename=os.path.join(
            self.tool_folder, self.__class__.HTML_REPORT_TEMPLATE),
                               strict_undefined=True)

        if os.path.exists(os.path.abspath(out_folder)):
            files = [
                os.path.join(out_folder, x) for x in os.listdir(out_folder)
                if not x.startswith('.')
            ]
            files.sort(key=lambda f: os.path.getmtime(f))
            file_info = [
                (os.path.split(f)[-1],
                 getFileString(os.path.split(f)[-1], self.opts.outdir))
                for f in files
            ]
        else:
            file_info = []

        template_parameters = {
            'program_name': str(self.program_name),
            'timestamp': str(timenow()),
            'file_info': file_info,
            'log_data': open(self.tlogname).read().replace('\n', '\n<BR />'),
            'command_string': str(self.cl)
        }

        f = open(self.opts.htmlout, 'w')
        f.write(my_template.render(**template_parameters))
        f.close()
 def makehtml(self):
     """
     write the report as html
     """
     logdat = open(self.tlogname,'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname,timenow()))
     res.append('<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>')
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0: # show what's left
         flist = [x for x in flist if not (x.startswith('.') or x == 'None')]
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i,f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn,self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs))
         res.append('</table></div>\n')
     res.append('<b>Log of activity</b><hr/>\n')
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append('<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
     res.append('generated all outputs reported here. These third party tools were')
     res.append('orchestrated by the Galaxy rgEstLibComplexity wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout,'w')
     f.write('\n'.join(res))
     f.close()
Example #4
0
 def __init__(self, opts=None, cl=[], tidy=True):
     """
     """
     self.ourname = 'rgGATKRecal'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl)  # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme, '_' * len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(
         self.opts.outdir, '%s_rg%s_Log.txt' % (self.title, self.ourname))
     self.tlog = open(self.tlogname, 'w')
     self.outtxt = '%s_%s_Out.txt' % (self.title, self.ourname)
     self.GATK_CVFlags = opts.GATK_CVflags
     self.Rscriptpath = whereis('Rscript')
     self.info = '%s on %s at %s' % (self.ourname, self.title, timenow())
     if self.Rscriptpath == None:  # GATK wants the explicit path to Rscript which comes with R now
         p = os.environ.get('PATH', '')
         self.tlog.write('### Cannot find %s on %s\n' % (program, p))
         self.Rscriptpath = '/share/shared/lx26-amd64/bin/Rscript'
     self.pdfoutdir = os.path.join(self.opts.outdir, 'pdfplots')
     self.preplotprefix = 'rgPreRecal_'
     self.postplotprefix = 'rgPostRecal_'
     try:
         os.makedirs(self.pdfoutdir)
     except:
         self.tlog.write('## unable to create pdf output dir %s' %
                         self.pdfoutdir)
     self.delme.append(self.pdfoutdir)
     self.runGATK()
     self.writehtml()
 def __init__(self, opts=None, cl=[], tidy=True):
     """
     """
     self.ourname = "rgGATKRecal"
     self.opts = opts
     self.tidy = tidy
     self.cl = " ".join(cl)  # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme, "_" * len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir, "%s_rg%s_Log.txt" % (self.title, self.ourname))
     self.tlog = open(self.tlogname, "w")
     self.outtxt = "%s_%s_Out.txt" % (self.title, self.ourname)
     self.GATK_CVFlags = opts.GATK_CVflags
     self.Rscriptpath = whereis("Rscript")
     self.info = "%s on %s at %s" % (self.ourname, self.title, timenow())
     if self.Rscriptpath == None:  # GATK wants the explicit path to Rscript which comes with R now
         p = os.environ.get("PATH", "")
         self.tlog.write("### Cannot find %s on %s\n" % (program, p))
         self.Rscriptpath = "/share/shared/lx26-amd64/bin/Rscript"
     self.pdfoutdir = os.path.join(self.opts.outdir, "pdfplots")
     self.preplotprefix = "rgPreRecal_"
     self.postplotprefix = "rgPostRecal_"
     try:
         os.makedirs(self.pdfoutdir)
     except:
         self.tlog.write("## unable to create pdf output dir %s" % self.pdfoutdir)
     self.delme.append(self.pdfoutdir)
     self.runGATK()
     self.writehtml()
 def makehtml(self):
     """
     write the report as html
     """
     logdat = open(self.tlogname, 'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname, timenow()))
     res.append(
         '<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>'
     )
     imghref = '%s.jpg' % os.path.splitext(self.isPDF)[0]  # removes .pdf
     res.append('<table cellpadding="10"><tr><td>\n')
     res.append(
         '<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>\n'
         % (self.isPDF, imghref, imghref))
     res.append('</tr><td></table>\n')
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(
             flist
     ) > 0:  # we should clean everything up - picard doesn't tell us what it did in cleansam unfortunately
         flist = [
             x for x in flist if not (x.startswith('.') or x == 'None')
         ]
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x)
                  for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i, f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn, self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' %
                        (fn, fs))
         res.append('</table></div>\n')
     res.append('<b>Log of activity</b><hr/>\n')
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append(
         '<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n'
     )
     res.append(
         'generated all outputs reported here. These third party tools were'
     )
     res.append(
         'orchestrated by the Galaxy rgInsertSize wrapper and this command line from the Galaxy form:<br/>\n%s'
         % (self.cl))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout, 'w')
     f.write('\n'.join(res))
     f.close()
 def writehtml(self):
     """
     write the report as html
     """
     logdat = open(self.tlogname,'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname,timenow()))
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0: # show what's left
         flist = [x for x in flist if not (x.startswith('.') or x == 'None')]
         pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         if len(pdfs) > 0:
             cells = []
             pdfs.sort()
             res.append('<div><table cellpadding="5" cellspacing="10">\n')
             for p in pdfs:                           
                 pfname = os.path.split(p)[-1]
                 pfroot = os.path.splitext(pfname)[0]
                 imghref = '%s.jpg' % pfroot # thumbnail name from mogrify
                 cl = ['mogrify', '-resize x300 -write %s %s' % (imghref,pfname),]
                 self.run(cl)
                 s = '<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>' % (pfname,imghref,pfname)
                 cells.append('<td>%s</br>%s</td>' % (pfroot,s))
             ncells = len(cells)
             for i in range(ncells):
                 if i % 2 == 1:
                     res.append('<tr>%s%s</tr>\n' % (cells[i-1],cells[i])) 
             if ncells % 2 == 0: # last one
                     res.append('<tr colspan="2">%s</tr>\n' % (cells[-1]))                     
             res.append('</table></div>\n')
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i,f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn,self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs))
         res.append('</table></div>\n')
     res.append('<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>')
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append('<hr/>Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a> \n')
     res.append('did all the work reportexampleBAM.bam ed here. GATK is an independent non-Galaxy community resource, whose third party tools were')
     res.append('orchestrated by the Galaxy rgGATKRecalibrate wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout,'w')
     f.write('\n'.join(res))
     f.close()
Example #8
0
 def writehtml(self):
     """
     write the report as html
     """
     logdat = open(self.tlogname,'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname,timenow()))
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0: # show what's left
         flist = [x for x in flist if not (x.startswith('.') or x == 'None')]
         pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         if len(pdfs) > 0:
             cells = []
             pdfs.sort()
             res.append('<div><table cellpadding="5" cellspacing="10">\n')
             for p in pdfs:                           
                 pfname = os.path.split(p)[-1]
                 pfroot = os.path.splitext(pfname)[0]
                 imghref = '%s.jpg' % pfroot # thumbnail name from mogrify
                 cl = ['mogrify', '-resize x300 -write %s %s' % (imghref,pfname),]
                 self.run(cl)
                 s = '<a href="%s"><img src="%s" title="%s" hspace="10" align="middle"></a>' % (pfname,imghref,pfname)
                 cells.append('<td>%s</br>%s</td>' % (pfroot,s))
             ncells = len(cells)
             for i in range(ncells):
                 if i % 2 == 1:
                     res.append('<tr>%s%s</tr>\n' % (cells[i-1],cells[i])) 
             if ncells % 2 == 0: # last one
                     res.append('<tr colspan="2">%s</tr>\n' % (cells[-1]))                     
             res.append('</table></div>\n')
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i,f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn,self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs))
         res.append('</table></div>\n')
     res.append('<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>')
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append('<hr/>Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a> \n')
     res.append('did all the work reportexampleBAM.bam ed here. GATK is an independent non-Galaxy community resource, whose third party tools were')
     res.append('orchestrated by the Galaxy rgGATKRecalibrate wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout,'w')
     f.write('\n'.join(res))
     f.close()
Example #9
0
 def __init__(self,opts=None,cl=[],tidy=True):
     """
     """
     self.ourname = 'rgSortBam'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl) # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme,'_'*len(killme))
     self.title = self.opts.title.translate(trantab)
     fd,self.tlogname = tempfile.mkstemp(dir=self.opts.tmpdir,suffix='rgSortBam.log')
     self.tlog = open(self.tlogname,'w')
     self.info = '%s on %s at %s' % (self.ourname,self.opts.title,timenow())
 def __init__(self,opts=None,cl=[],tidy=False):
     """
     """
     self.ourname = 'rgPicardInsertSize'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl) # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme,'_'*len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir,'rgInsertSizeMetrics.txt')
     self.tlog = open(self.tlogname,'w')
     self.isPDF = 'InsertSizeHist.pdf'
     self.info = '%s on %s at %s' % (self.ourname,self.title,timenow())
Example #11
0
 def writehtml(self):
     """
     write the report as html
     note complications needed to write pre and post reports - they have to be separated since gatk insists on giving them all the same names
     but at least allows a separate output directory...
     """
     logdat = open(self.tlogname, 'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname, timenow()))
     res.append(
         '<font size="-2">Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a>'
     )
     res.append(
         'did all the calculations arranged here in your Galaxy history')
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0:  # show what's left after cleanup
         flist = [
             x for x in flist if not (x.startswith('.') or x == 'None')
         ]
         pdfs = [
             x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf'
         ]
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x)
                  for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         if len(pdfs) > 0:
             res = self.writeImages(pdfs, res)
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i, f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn, self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' %
                        (fn, fs))
         res.append('</table></div>\n')
     res.append(
         '<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>'
     )
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout, 'w')
     f.write('\n'.join(res))
     f.close()
 def __init__(self, opts=None, cl=[], tidy=False):
     """
     """
     self.ourname = 'rgPicardInsertSize'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl)  # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme, '_' * len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir,
                                  'rgInsertSizeMetrics.txt')
     self.tlog = open(self.tlogname, 'w')
     self.isPDF = 'InsertSizeHist.pdf'
     self.info = '%s on %s at %s' % (self.ourname, self.title, timenow())
 def __init__(self,opts=None,cl=[],tidy=True):
     """
     """
     self.ourname = 'rgGATKRecal'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl) # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme,'_'*len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir,'%s_rg%s_Log.txt' % (self.title,self.ourname))
     self.tlog = open(self.tlogname,'w')
     self.outtxt = '%s_%s_Out.txt' % (self.title,self.ourname)
     self.info = '%s on %s at %s' % (self.ourname,self.title,timenow())
     self.runGATK()
     self.writehtml()
Example #14
0
 def __init__(self, opts=None, cl=[], tidy=True):
     """
     """
     self.ourname = 'rgGATKRecal'
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl)  # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme, '_' * len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(
         self.opts.outdir, '%s_rg%s_Log.txt' % (self.title, self.ourname))
     self.tlog = open(self.tlogname, 'w')
     self.outtxt = '%s_%s_Out.txt' % (self.title, self.ourname)
     self.info = '%s on %s at %s' % (self.ourname, self.title, timenow())
     self.runGATK()
     self.writehtml()
 def __init__(self,opts=None,cl=[],fargs=[],tidy=True):
     """
     """
     self.ourname = 'rgGATKCoverDepth'
     self.fargs = fargs
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl) # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme,'_'*len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir,'%s_rg%s_Log.txt' % (self.title,self.ourname))
     self.tlog = open(self.tlogname,'w')
     self.info = '%s on %s at %s' % (self.ourname,self.title,timenow())
     try:
         os.makedirs(self.opts.outdir)
         self.tlog.write('# made out dir %s\n' % self.opts.outdir) 
     except:
         pass
Example #16
0
 def __init__(self,opts=None,cl=[],fargs=[],tidy=True):
     """
     """
     self.ourname = 'rgGATKCoverDepth'
     self.fargs = fargs
     self.opts = opts
     self.tidy = tidy
     self.cl = ' '.join(cl) # ready for the htmlfile output
     self.delme = []
     killme = string.punctuation + string.whitespace
     trantab = string.maketrans(killme,'_'*len(killme))
     self.title = self.opts.title.translate(trantab)
     self.tlogname = os.path.join(self.opts.outdir,'%s_rg%s_Log.txt' % (self.title,self.ourname))
     self.tlog = open(self.tlogname,'w')
     self.info = '%s on %s at %s' % (self.ourname,self.title,timenow())
     try:
         os.makedirs(self.opts.outdir)
         self.tlog.write('# made out dir %s\n' % self.opts.outdir) 
     except:
         pass
 def writehtml(self):
     """
     write the report as html
     note complications needed to write pre and post reports - they have to be separated since gatk insists on giving them all the same names
     but at least allows a separate output directory...
     """
     logdat = open(self.tlogname, "r").readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname, timenow()))
     res.append(
         '<font size="-2">Note: The freely available <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Main_Page">GATK</a>'
     )
     res.append("did all the calculations arranged here in your Galaxy history")
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0:  # show what's left after cleanup
         flist = [x for x in flist if not (x.startswith(".") or x == "None")]
         pdfs = [x for x in flist if os.path.splitext(x)[-1].lower() == ".pdf"]
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir, x)), x) for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         if len(pdfs) > 0:
             res = self.writeImages(pdfs, res)
         res.append("<div><b>Output files.</b><hr/>\n")
         res.append("<table>\n")
         for i, f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn, self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn, fs))
         res.append("</table></div>\n")
     res.append(
         "<b>Your job produced the following log of activity - check here for a record of what was done and any unexpected events</b><hr/>"
     )
     res.append("\n%s" % "<br/>".join(logdat))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout, "w")
     f.write("\n".join(res))
     f.close()
 def makehtml(self):
     """
     write the report as html
     """
     logdat = open(self.tlogname,'r').readlines()
     res = []
     res.append(galhtmlprefix % progname)
     res.append(galhtmlattr % (progname,timenow()))
     res.append('<b>Your job produced the following outputs - check here for a record of what was done and any unexpected events</b><hr/>')
     imghref = '%s.jpg' % os.path.splitext(self.isPDF)[0] # removes .pdf
     res.append('<table cellpadding="10"><tr><td>\n')
     res.append('<a href="%s"><img src="%s" alt="%s" hspace="10" align="middle"></a>\n' % (self.isPDF,imghref,imghref))
     res.append('</tr><td></table>\n')
     try:
         flist = os.listdir(self.opts.outdir)
     except:
         flist = []
     if len(flist) > 0: # we should clean everything up - picard doesn't tell us what it did in cleansam unfortunately
         flist = [x for x in flist if not (x.startswith('.') or x == 'None')]
         tlist = [(os.path.getmtime(os.path.join(self.opts.outdir,x)),x) for x in flist]
         tlist.sort()
         flist = [x[1] for x in tlist]
         res.append('<div><b>Output files.</b><hr/>\n')
         res.append('<table>\n')
         for i,f in enumerate(flist):
             fn = os.path.split(f)[-1]
             fs = getFileString(fn,self.opts.outdir)
             res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fs))
         res.append('</table></div>\n')
     res.append('<b>Log of activity</b><hr/>\n')
     res.append('\n%s' % '<br/>'.join(logdat))
     res.append('<hr/>Note: The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
     res.append('generated all outputs reported here. These third party tools were')
     res.append('orchestrated by the Galaxy rgInsertSize wrapper and this command line from the Galaxy form:<br/>\n%s' % (self.cl))
     res.append(galhtmlpostfix)
     f = open(self.opts.htmlout,'w')
     f.write('\n'.join(res))
     f.close()
Example #19
0
 outfname = sys.argv[3]
 logf = sys.argv[4]
 logoutdir = sys.argv[5]
 gffout = sys.argv[6]
 topn = 1000
 try:
     os.makedirs(logoutdir)
 except:
     pass
 map_file = None
 me = sys.argv[0]
 amapf = '%s.bim' % bfname # to decode map in xformModel
 flog = file(logf,'w')
 logme = []
 cdir = os.getcwd()
 s = 'Rgenetics %s http://rgenetics.org Galaxy Tools, rgCaCo.py started %s\n' % (myversion,timenow())
 print >> sys.stdout, s # so will appear as blurb for file
 logme.append(s)
 if verbose:
     s = 'rgCaCo.py:  bfname=%s, logf=%s, argv = %s\n' % (bfname, logf, sys.argv) 
     print >> sys.stdout, s # so will appear as blurb for file
     logme.append(s)
 twd = tempfile.mkdtemp(suffix='rgCaCo') # make sure plink doesn't spew log file into the root!
 tname = os.path.join(twd,name)
 vcl = [plinke,'--noweb','--bfile',bfname,'--out',name,'--model']
 p=subprocess.Popen(' '.join(vcl),shell=True,stdout=flog,cwd=twd)
 retval = p.wait()
 resf = '%s.model' % tname # plink output is here we hope
 xformModel(bfname,resf,outfname,name,amapf,flog) # leaves the desired summary file
 makeGFF(resf=outfname,outfname=gffout,logf=flog,twd=twd,name='rgGLM_TopTable',description=name,topn=topn)
 flog.write('\n'.join(logme))
Example #20
0
def clean():
    """
    """
    if len(sys.argv) < 14:
        print >> sys.stdout, '## %s expected 14 params in sys.argv, got %d - %s' % (prog,len(sys.argv),sys.argv)
        print >> sys.stdout, """this script will filter a linkage format ped
        and map file containing genotypes. It takes 14 parameters - the plink --f parameter and"
        a new filename root for the output clean data followed by the mind,geno,hwe,maf, mef and mei"
        documented in the plink docs plus the file to be returned to Galaxy
        Called as:
        <command interpreter="python">
        rgLDIndep.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind'
        '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1'
        '$out_file1.extra_files_path'  '$window' '$step' '$r2'
        </command>
        """
        sys.exit(1)
    plog = ['## Rgenetics: http://rgenetics.org Galaxy Tools rgLDIndep.py started %s\n' % timenow()]
    inpath = sys.argv[1]
    inbase = sys.argv[2]
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme,'_'*len(killme))
    title = sys.argv[3].translate(trantab)
    mind = sys.argv[4]
    geno = sys.argv[5]
    hwe = sys.argv[6]
    maf = sys.argv[7]
    me1 = sys.argv[8]
    me2 = sys.argv[9]
    outfname = sys.argv[10]
    outfpath = sys.argv[11]
    winsize = sys.argv[12]
    step = sys.argv[13]
    r2 = sys.argv[14]
    output = os.path.join(outfpath,outfname)
    outpath = os.path.join(outfpath,title)
    outprunepath = os.path.join(outfpath,'ldprune_%s' % title)
    try:
      os.makedirs(outfpath)
    except:
      pass
    bfile = os.path.join(inpath,inbase)
    filterout = os.path.join(outpath,'filtered_%s' % inbase)
    outf = file(outfname,'w')
    outf.write(galhtmlprefix % prog)
    ldin = bfile
    plinktasks = [['--bfile',ldin,'--indep-pairwise %s %s %s' % (winsize,step,r2),'--out',outpath,
    '--mind',mind,'--geno',geno,'--maf',maf,'--hwe',hwe,'--me',me1,me2,],
    ['--bfile',ldin,'--extract %s.prune.in --make-bed --out %s' % (outpath,outpath)],
    ['--bfile',outpath,'--recode --out',outpath]] # make map file - don't really need ped but...
    # subset of ld independent markers for eigenstrat and other requirements
    vclbase = [plinke,'--noweb']
    prunelog = pruneld(plinktasks=plinktasks,cd=outfpath,vclbase = vclbase)
    """This generates the same output files as the first version;
    the only difference is that a simple pairwise threshold is used.
    The first two parameters (50 and 5) are the same as above (window size and step);
    the third parameter represents the r^2 threshold.
    Note: this represents the pairwise SNP-SNP metric now, not the
    multiple correlation coefficient; also note, this is based on the
    genotypic correlation, i.e. it does not involve phasing. 
    """
    plog += prunelog
    flog = '%s.log' % outpath
    flogf = open(flog,'w')
    flogf.write(''.join(plog))
    flogf.write('\n')
    flogf.close()
    globme = os.path.join(outfpath,'*')
    flist = glob.glob(globme)
    flist.sort()
    for i, data in enumerate( flist ):
        outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
    outf.write('</ol></div>\n')
    outf.write("</div></body></html>")
    outf.close()
Example #21
0
def main():
    u = """<command interpreter="python">
        rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col'
    </command>
    """
    npar = 8
    if len(sys.argv) < npar:
            print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar
            print >> sys.stdout, u
            sys.exit(1)
    input_fname = sys.argv[1]
    title = sys.argv[2]
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme,'_'*len(killme))
    ctitle = title.translate(trantab)
    outhtml = sys.argv[3]
    outdir = sys.argv[4]
    try:
         chrom_col = int(sys.argv[5])
    except:
         chrom_col = -1
    try:
        offset_col = int(sys.argv[6])
    except:
        offset_col = -1
    p = sys.argv[7].strip().split(',')
    try:
        q = [int(x) for x in p]
    except:
        p = -1
    if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots
        chrom_col = -1
        offset_col = -1
    grey = 0
    if (sys.argv[8].lower() in ['1','true']):
       grey = 1
    if p == -1:
        print >> sys.stderr,'## Cannot run rgManQQ - missing pval column'
        sys.exit(1)
    p = ['%d' % (int(x) + 1) for x in p]
    rlog,flist = doManQQ(input_fname,chrom_col+1,offset_col+1,','.join(p),title,grey,ctitle,outdir)
    flist.sort()
    html = [galhtmlprefix % progname,]
    html.append('<h1>%s</h1>' % title)
    if len(flist) > 0:
        html.append('<table>\n')
        for row in flist:
            fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script
            n,e = os.path.splitext(fname)
            if e in ['.png','.jpg']:
                pdf = '%s.pdf' % n
                pdff = os.path.join(outdir,pdf)
                if os.path.exists(pdff):
                    rval = compressPDF(inpdf=pdff)
                    if rval <> 0:
                        pdf = '%s(not_compressed)' % pdf
                else:
                    pdf = '%s(not_found)' % pdf
                s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \
                 % (pdf,fname,expl)
                html.append(s)
            else:
               html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname,expl))
        html.append('</table>\n')
    else:
        html.append('<h2>### Error - R returned no files - please confirm that parameters are sane</h1>')    
    html.append('<h3>R log follows below</h3><hr><pre>\n')
    html += rlog
    html.append('</pre>\n')   
    html.append(galhtmlattr % (progname,timenow()))
    html.append(galhtmlpostfix)
    htmlf = file(outhtml,'w')
    htmlf.write('\n'.join(html))
    htmlf.write('\n')
    htmlf.close()
Example #22
0
def pruneld(plinktasks=[] ,cd='./',vclbase = []):
    """
    plink blathers when doing pruning - ignore
    Linkage disequilibrium based SNP pruning
    if a million snps in 3 billion base pairs, have mean 3k spacing
    assume 40-60k of ld in ceu, a window of 120k width is about 40 snps
    so lots more is perhaps less efficient - each window computational cost is
    ON^2 unless the code is smart enough to avoid unecessary computation where
    allele frequencies make it impossible to see ld > the r^2 cutoff threshold
    So, do a window and move forward 20? 
    from the plink docs at http://pngu.mgh.harvard.edu/~purcell/plink/summary.shtml#prune
    
Sometimes it is useful to generate a pruned subset of SNPs that are in approximate linkage equilibrium with each other. This can be achieved via two commands: --indep which prunes based on the variance inflation factor (VIF), which recursively removes SNPs within a sliding window; second, --indep-pairwise which is similar, except it is based only on pairwise genotypic correlation.

Hint The output of either of these commands is two lists of SNPs: those that are pruned out and those that are not. A separate command using the --extract or --exclude option is necessary to actually perform the pruning.

The VIF pruning routine is performed:
plink --file data --indep 50 5 2

will create files

     plink.prune.in
     plink.prune.out

Each is a simlpe list of SNP IDs; both these files can subsequently be specified as the argument for 
a --extract or --exclude command.

The parameters for --indep are: window size in SNPs (e.g. 50), the number of SNPs to shift the 
window at each step (e.g. 5), the VIF threshold. The VIF is 1/(1-R^2) where R^2 is the multiple correlation coefficient for a SNP being regressed on all other SNPs simultaneously. That is, this considers the correlations between SNPs but also between linear combinations of SNPs. A VIF of 10 is often taken to represent near collinearity problems in standard multiple regression analyses (i.e. implies R^2 of 0.9). A VIF of 1 would imply that the SNP is completely independent of all other SNPs. Practically, values between 1.5 and 2 should probably be used; particularly in small samples, if this threshold is too low and/or the window size is too large, too many SNPs may be removed.

The second procedure is performed:
plink --file data --indep-pairwise 50 5 0.5

This generates the same output files as the first version; the only difference is that a 
simple pairwise threshold is used. The first two parameters (50 and 5) are the same as above (window size and step); the third parameter represents the r^2 threshold. Note: this represents the pairwise SNP-SNP metric now, not the multiple correlation coefficient; also note, this is based on the genotypic correlation, i.e. it does not involve phasing.

To give a concrete example: the command above that specifies 50 5 0.5 would a) consider a
window of 50 SNPs, b) calculate LD between each pair of SNPs in the window, b) remove one of a pair of SNPs if the LD is greater than 0.5, c) shift the window 5 SNPs forward and repeat the procedure.

To make a new, pruned file, then use something like (in this example, we also convert the 
standard PED fileset to a binary one):
plink --file data --extract plink.prune.in --make-bed --out pruneddata
    """
    logres = ['## Rgenetics %s: http://rgenetics.org Galaxy Tools rgLDIndep.py Plink pruneLD runner\n' % myversion,]
    for task in plinktasks: # each is a list
        fplog,plog = tempfile.mkstemp()
        sto = open(plog,'w') # to catch the blather
        vcl = vclbase + task
        s = '## ldindep now executing %s\n' % ' '.join(vcl)
        print s
        logres.append(s)
        x = subprocess.Popen(' '.join(vcl),shell=True,stdout=sto,stderr=sto,cwd=cd)
        retval = x.wait()
        sto.close()
        sto = open(plog,'r') # read
        try:
            lplog = sto.readlines()
            lplog = [x for x in lplog if x.find('Pruning SNP') == -1]
            logres += lplog
            logres.append('\n')
        except:
            logres.append('### %s Strange - no std out from plink when running command line\n%s' % (timenow(),' '.join(vcl)))
        sto.close()
        os.unlink(plog) # no longer needed
    return logres
def main():
    u = """<command interpreter="python">
        rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col'
    </command>
    """
    print >>sys.stdout, "## rgManQQ.py. cl= \n%s" % " ".join(['"%s"' % x for x in sys.argv])
    npar = 8
    if len(sys.argv) < npar:
        print >>sys.stdout, "## error - too few command line parameters - wanting %d" % npar
        print >>sys.stdout, u
        sys.exit(1)
    input_fname = sys.argv[1]
    title = sys.argv[2]
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme, "_" * len(killme))
    ctitle = title.translate(trantab)
    outhtml = sys.argv[3]
    outdir = sys.argv[4]
    try:
        chrom_col = int(sys.argv[5]) + 1
    except:
        chrom_col = 0
    try:
        offset_col = int(sys.argv[6]) + 1
    except:
        offset_col = 0
    p = sys.argv[7].strip().split(",")
    try:
        p = [int(x) + 1 for x in p]
        pval_cols = "c(%s)" % ",".join(map(str, p))
    except:
        pval_cols = "c(0)"
    if chrom_col == 1 or offset_col == 1:  # was passed as zero - do not do manhattan plots
        chrom_col = 0
        offset_col = 0
    grey = 0
    if sys.argv[8].lower() in ["1", "true"]:
        grey = 1
    rlog, flist = doManQQ(input_fname, chrom_col, offset_col, pval_cols, title, grey, ctitle, outdir)
    flist.sort()
    html = [galhtmlprefix % progname]
    html.append("<h1>%s</h1>" % title)
    if len(flist) > 0:
        html.append("<table>\n")
        for row in flist:
            fname, expl = row  # RRun returns pairs of filenames fiddled for the log and R script
            e = os.path.splitext(fname)[-1]
            if e in [".png", ".jpg"]:
                s = (
                    '<tr><td><a href="%s"><img src="%s" alt="%s hspace="10" width="400"><br>(Click to download image %s)</a></td></tr>'
                    % (fname, fname, expl, expl)
                )
                html.append(s)
            else:
                html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname, expl))
        html.append("</table>\n")
    else:
        html.append("<h2>### Error - R returned no files - please confirm that parameters are sane</h1>")
    html.append("<h3>R log follows below</h3><hr><pre>\n")
    html += rlog
    html.append("</pre>\n")
    html.append(galhtmlattr % (progname, timenow()))
    html.append(galhtmlpostfix)
    htmlf = file(outhtml, "w")
    htmlf.write("\n".join(html))
    htmlf.write("\n")
    htmlf.close()
Example #24
0
 logf = sys.argv[4]
 logoutdir = sys.argv[5]
 gffout = sys.argv[6]
 topn = 1000
 try:
     os.makedirs(logoutdir)
 except:
     pass
 map_file = None
 me = sys.argv[0]
 amapf = '%s.bim' % bfname  # to decode map in xformModel
 flog = file(logf, 'w')
 logme = []
 cdir = os.getcwd()
 s = 'Rgenetics %s http://rgenetics.org Galaxy Tools, rgCaCo.py started %s\n' % (
     myversion, timenow())
 print >> sys.stdout, s  # so will appear as blurb for file
 logme.append(s)
 if verbose:
     s = 'rgCaCo.py:  bfname=%s, logf=%s, argv = %s\n' % (bfname, logf,
                                                          sys.argv)
     print >> sys.stdout, s  # so will appear as blurb for file
     logme.append(s)
 twd = tempfile.mkdtemp(
     suffix='rgCaCo'
 )  # make sure plink doesn't spew log file into the root!
 tname = os.path.join(twd, name)
 vcl = [plinke, '--noweb', '--bfile', bfname, '--out', name, '--model']
 p = subprocess.Popen(' '.join(vcl), shell=True, stdout=flog, cwd=twd)
 retval = p.wait()
 resf = '%s.model' % tname  # plink output is here we hope
Example #25
0
def doIBS(n=100):
    """parse parameters from galaxy
    expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n'
    <command interpreter="python">
         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z' 
    </command>

    """
    u = """<command interpreter="python">
         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z'
         </command>
      """

    if len(sys.argv) < 7:
        print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please'
        print >> sys.stdout, u
        sys.exit(1)
    ts = '%s%s' % (string.punctuation, string.whitespace)
    ptran = string.maketrans(ts, '_' * len(ts))
    inpath = sys.argv[1]
    ldinpath = os.path.split(inpath)[0]
    basename = sys.argv[2]
    outhtml = sys.argv[3]
    newfilepath = sys.argv[4]
    title = sys.argv[5].translate(ptran)
    logfname = 'Log_%s.txt' % title
    logpath = os.path.join(
        newfilepath,
        logfname)  # log was a child - make part of html extra_files_path zoo
    n = int(sys.argv[6])
    try:
        Zcutoff = float(sys.argv[7])
    except:
        Zcutoff = 2.0
    try:
        os.makedirs(newfilepath)
    except:
        pass
    logf = file(logpath, 'w')
    efp, ibase_name = os.path.split(
        inpath)  # need to use these for outputs in files_path
    ped = plinkbinJZ.BPed(inpath)
    ped.parse(quick=True)
    if ped == None:
        print >> sys.stderr, '## doIBSpy problem - cannot open %s or %s - cannot run' % (
            ldreduced, basename)
        sys.exit(1)
    newfiles, explanations, repOut = doIBSpy(ped=ped,
                                             basename=basename,
                                             outdir=newfilepath,
                                             logf=logf,
                                             nrsSamples=n,
                                             title=title,
                                             pdftoo=0,
                                             Zcutoff=Zcutoff)
    logf.close()
    logfs = file(logpath, 'r').readlines()
    lf = file(outhtml, 'w')
    lf.write(galhtmlprefix % PROGNAME)
    # this is a mess. todo clean up - should each datatype have it's own directory? Yes
    # probably. Then titles are universal - but userId libraries are separate.
    s = '<div>Output from %s run at %s<br>\n' % (PROGNAME, timenow())
    lf.write('<h4>%s</h4>\n' % s)
    fixed = ["'%s'" % x for x in sys.argv]  # add quotes just in case
    s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (
        ' '.join(fixed))
    lf.write(s)
    # various ways of displaying svg - experiments related to missing svg mimetype on test (!)
    #s = """<object data="%s" type="image/svg+xml"  width="%d" height="%d">
    #       <embed src="%s" type="image/svg+xml" width="%d" height="%d" />
    #       </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
    s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (
        newfiles[0], PLOT_WIDTH, PLOT_HEIGHT)
    #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
    lf.write(s)
    lf.write(
        '<div><h4>Click the links below to save output files and plots</h4><br><ol>\n'
    )
    for i in range(len(newfiles)):
        if i == 0:
            lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' %
                     (newfiles[i], explanations[i]))
        else:
            lf.write('<li><a href="%s">%s</a></li>\n' %
                     (newfiles[i], explanations[i]))
    flist = os.listdir(newfilepath)
    for fname in flist:
        if not fname in newfiles:
            lf.write('<li><a href="%s">%s</a></li>\n' % (fname, fname))
    lf.write('</ol></div>')
    lf.write('<div>%s</div>' %
             ('\n'.join(repOut)))  # repOut is a list of tables
    lf.write(
        '<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>'
        % (logfname, ''.join(logfs)))
    lf.write('</body></html>\n')
    lf.close()
    logf.close()
Example #26
0
def doIBS(n=100):
    """parse parameters from galaxy
    expect 'input pbed path' 'basename' 'outpath' 'title' 'logpath' 'n'
    <command interpreter="python">
         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z' 
    </command>

    """
    u="""<command interpreter="python">
         rgGRR.py $i.extra_files_path/$i.metadata.base_name "$i.metadata.base_name"
        '$out_file1' '$out_file1.files_path' "$title1"  '$n' '$Z'
         </command>
      """


    if len(sys.argv) < 7:
        print >> sys.stdout, 'Need pbed inpath, basename, out_htmlname, outpath, title, logpath, nSNP, Zcutoff on command line please'
        print >> sys.stdout, u
        sys.exit(1)
    ts = '%s%s' % (string.punctuation,string.whitespace)
    ptran =  string.maketrans(ts,'_'*len(ts))
    inpath = sys.argv[1]
    ldinpath = os.path.split(inpath)[0]
    basename = sys.argv[2]
    outhtml = sys.argv[3]
    newfilepath = sys.argv[4]
    title = sys.argv[5].translate(ptran)
    logfname = 'Log_%s.txt' % title
    logpath = os.path.join(newfilepath,logfname) # log was a child - make part of html extra_files_path zoo
    n = int(sys.argv[6])
    try:
        Zcutoff = float(sys.argv[7])
    except:
        Zcutoff = 2.0
    try:
        os.makedirs(newfilepath)
    except:
        pass
    logf = file(logpath,'w')
    efp,ibase_name = os.path.split(inpath) # need to use these for outputs in files_path
    ped = plinkbinJZ.BPed(inpath)
    ped.parse(quick=True)	
    if ped == None:
        print >> sys.stderr, '## doIBSpy problem - cannot open %s or %s - cannot run' % (ldreduced,basename)
        sys.exit(1)
    newfiles,explanations,repOut = doIBSpy(ped=ped,basename=basename,outdir=newfilepath,
                                    logf=logf,nrsSamples=n,title=title,pdftoo=0,Zcutoff=Zcutoff)
    logf.close()
    logfs = file(logpath,'r').readlines()
    lf = file(outhtml,'w')
    lf.write(galhtmlprefix % PROGNAME)
    # this is a mess. todo clean up - should each datatype have it's own directory? Yes
    # probably. Then titles are universal - but userId libraries are separate.
    s = '<div>Output from %s run at %s<br>\n' % (PROGNAME,timenow())
    lf.write('<h4>%s</h4>\n' % s)
    fixed = ["'%s'" % x for x in sys.argv] # add quotes just in case
    s = 'If you need to rerun this analysis, the command line was\n<pre>%s</pre>\n</div>' % (' '.join(fixed))
    lf.write(s)
    # various ways of displaying svg - experiments related to missing svg mimetype on test (!)
    #s = """<object data="%s" type="image/svg+xml"  width="%d" height="%d">
    #       <embed src="%s" type="image/svg+xml" width="%d" height="%d" />
    #       </object>""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT,newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
    s = """ <embed src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
    #s = """ <iframe src="%s" type="image/svg+xml" width="%d" height="%d" />""" % (newfiles[0],PLOT_WIDTH,PLOT_HEIGHT)
    lf.write(s)
    lf.write('<div><h4>Click the links below to save output files and plots</h4><br><ol>\n')
    for i in range(len(newfiles)):
       if i == 0:
            lf.write('<li><a href="%s" type="image/svg+xml" >%s</a></li>\n' % (newfiles[i],explanations[i]))
       else:
             lf.write('<li><a href="%s">%s</a></li>\n' % (newfiles[i],explanations[i]))
    flist = os.listdir(newfilepath)
    for fname in flist:
        if not fname in newfiles:
             lf.write('<li><a href="%s">%s</a></li>\n' % (fname,fname))
    lf.write('</ol></div>')
    lf.write('<div>%s</div>' % ('\n'.join(repOut))) # repOut is a list of tables
    lf.write('<div><hr><h3>Log from this job (also stored in %s)</h3><pre>%s</pre><hr></div>' % (logfname,''.join(logfs)))
    lf.write('</body></html>\n')
    lf.close()
    logf.close()
Example #27
0
 alogf = options.logf # absolute paths
 od = os.path.split(alogf)[0]
 try:
   os.path.makedirs(od)
 except:
   pass
 aoutf = options.outfname # absolute paths
 od = os.path.split(aoutf)[0]
 try:
   os.path.makedirs(od)
 except:
   pass
 vcl = [plinke,'--noweb', '--bfile',options.bfname,'--out',title,'--mind','0.5','--tdt']
 logme = []
 if verbose:
     s = 'Rgenetics %s http://rgenetics.org Galaxy Tools rgTDT.py started %s\n' % (myversion,timenow())
     print >> sys.stdout,s
     logme.append(s)
     s ='rgTDT.py: bfname=%s, logf=%s, argv = %s\n' % (options.bfname,alogf, sys.argv)
     print >> sys.stdout,s
     logme.append(s)
     s = 'rgTDT.py: vcl=%s\n' % (' '.join(vcl))
     print >> sys.stdout,s
     logme.append(s)
 twd = tempfile.mkdtemp(suffix='rgTDT') # make sure plink doesn't spew log file into the root!
 tname = os.path.join(twd,title)
 p=subprocess.Popen(' '.join(vcl),shell=True,cwd=twd)
 retval = p.wait()
 shutil.copy('%s.log' % tname,alogf)
 sto = file(alogf,'a')
 sto.write('\n'.join(logme))
Example #28
0
def main():
    u = """<command interpreter="python">
        rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col'
    </command>
    """
    npar = 8
    if len(sys.argv) < npar:
        print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar
        print >> sys.stdout, u
        sys.exit(1)
    input_fname = sys.argv[1]
    title = sys.argv[2]
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme, '_' * len(killme))
    ctitle = title.translate(trantab)
    outhtml = sys.argv[3]
    outdir = sys.argv[4]
    try:
        chrom_col = int(sys.argv[5])
    except:
        chrom_col = -1
    try:
        offset_col = int(sys.argv[6])
    except:
        offset_col = -1
    p = sys.argv[7].strip().split(',')
    try:
        q = [int(x) for x in p]
    except:
        p = -1
    if chrom_col == -1 or offset_col == -1:  # was passed as zero - do not do manhattan plots
        chrom_col = -1
        offset_col = -1
    grey = 0
    if (sys.argv[8].lower() in ['1', 'true']):
        grey = 1
    if p == -1:
        print >> sys.stderr, '## Cannot run rgManQQ - missing pval column'
        sys.exit(1)
    p = ['%d' % (int(x) + 1) for x in p]
    rlog, flist = doManQQ(input_fname, chrom_col + 1, offset_col + 1,
                          ','.join(p), title, grey, ctitle, outdir)
    flist.sort()
    html = [
        galhtmlprefix % progname,
    ]
    html.append('<h1>%s</h1>' % title)
    if len(flist) > 0:
        html.append('<table>\n')
        for row in flist:
            fname, expl = row  # RRun returns pairs of filenames fiddled for the log and R script
            n, e = os.path.splitext(fname)
            if e in ['.png', '.jpg']:
                pdf = '%s.pdf' % n
                pdff = os.path.join(outdir, pdf)
                if os.path.exists(pdff):
                    rval = compressPDF(inpdf=pdff)
                    if rval <> 0:
                        pdf = '%s(not_compressed)' % pdf
                else:
                    pdf = '%s(not_found)' % pdf
                s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \
                 % (pdf,fname,expl)
                html.append(s)
            else:
                html.append('<tr><td><a href="%s">%s</a></td></tr>' %
                            (fname, expl))
        html.append('</table>\n')
    else:
        html.append(
            '<h2>### Error - R returned no files - please confirm that parameters are sane</h1>'
        )
    html.append('<h3>R log follows below</h3><hr><pre>\n')
    html += rlog
    html.append('</pre>\n')
    html.append(galhtmlattr % (progname, timenow()))
    html.append(galhtmlpostfix)
    htmlf = file(outhtml, 'w')
    htmlf.write('\n'.join(html))
    htmlf.write('\n')
    htmlf.close()
def runEigen():
    """ run the smartpca prog - documentation follows

    smartpca.perl -i fakeped_100.eigenstratgeno -a fakeped_100.map -b fakeped_100.ind -p fakeped_100 -e fakeped_100.eigenvals -l
        fakeped_100.eigenlog -o fakeped_100.eigenout

DOCUMENTATION OF smartpca.perl program:

This program calls the smartpca program (see ../POPGEN/README).
For this to work, the bin directory containing smartpca MUST be in your path.
See ./example.perl for a toy example.

../bin/smartpca.perl
-i example.geno  : genotype file in EIGENSTRAT format (see ../CONVERTF/README)
-a example.snp   : snp file   (see ../CONVERTF/README)
-b example.ind   : indiv file (see ../CONVERTF/README)
-k k             : (Default is 10) number of principal components to output
-o example.pca   : output file of principal components.  Individuals removed
                   as outliers will have all values set to 0.0 in this file.
-p example.plot  : prefix of output plot files of top 2 principal components.
                   (labeling individuals according to labels in indiv file)
-e example.eval  : output file of all eigenvalues
-l example.log   : output logfile
-m maxiter       : (Default is 5) maximum number of outlier removal iterations.
                   To turn off outlier removal, set -m 0.
-t topk          : (Default is 10) number of principal components along which
                   to remove outliers during each outlier removal iteration.
-s sigma         : (Default is 6.0) number of standard deviations which an
                   individual must exceed, along one of topk top principal
                   components, in order to be removed as an outlier.

    now uses https://www.bx.psu.edu/cgi-bin/trac.cgi/galaxy/changeset/1832

All files can be viewed however, by making links in the primary (HTML) history item like:
<img src="display_child?parent_id=2&designation=SomeImage?" alt="Some Image"/>
<a href="display_child?parent_id=2&designation=SomeText?">Some Text</a>

    <command interpreter="python">
    rgEigPCA.py "$i.extra_files_path/$i.metadata.base_name" "$title" "$out_file1"
    "$out_file1.files_path" "$k" "$m" "$t" "$s" "$pca"
    </command>

    """
    if len(sys.argv) < 9:
        print 'Need an input genotype file root, a title, a temp id and the temp file path for outputs,'
        print ' and the 4 integer tuning parameters k,m,t and s in order. Given that, will run smartpca for eigensoft'
        sys.exit(1)
    else:
        print >> sys.stdout, 'rgEigPCA.py got %s' % (' '.join(sys.argv))
    skillme = ' %s' % string.punctuation
    trantab = string.maketrans(skillme,'_'*len(skillme))
    ofname = sys.argv[5]
    progname = os.path.basename(sys.argv[0])
    infile = sys.argv[1]
    infpath,base_name = os.path.split(infile) # now takes precomputed or autoconverted ldreduced dataset
    title = sys.argv[2].translate(trantab) # must replace all of these for urls containing title
    outfile1 = sys.argv[3]
    newfilepath = sys.argv[4]
    try:
       os.mkdirs(newfilepath)
    except:
       pass
    op = os.path.split(outfile1)[0]
    try: # for test - needs this done
        os.makedirs(op)
    except:
        pass
    eigen_k = sys.argv[5]
    eigen_m = sys.argv[6]
    eigen_t = sys.argv[7]
    eigen_s = sys.argv[8]
    eigpca = sys.argv[9] # path to new dataset for pca results - for later adjustment
    eigentitle = os.path.join(newfilepath,title)
    explanations=['Samples plotted in first 2 eigenvector space','Principle components','Eigenvalues',
    'Smartpca log (contents shown below)']
    rplotname = 'PCAPlot.pdf'
    eigenexts = [rplotname, "pca.xls", "eval.xls"]
    newfiles = ['%s_%s' % (title,x) for x in eigenexts] # produced by eigenstrat
    rplotout = os.path.join(newfilepath,newfiles[0]) # for R plots
    eigenouts = [x for x in newfiles]
    eigenlogf = '%s_log.txt' % title
    newfiles.append(eigenlogf) # so it will also appear in the links
    lfname = outfile1
    lf = file(lfname,'w')
    lf.write(galhtmlprefix % progname)
    try:
        os.makedirs(newfilepath)
    except:
        pass
    smartCL = '%s -i %s.bed -a %s.bim -b %s.fam -o %s -p %s -e %s -l %s -k %s -m %s -t %s -s %s' % \
          (smartpca,infile, infile, infile, eigenouts[1],'%s_eigensoftplot.pdf' % title,eigenouts[2],eigenlogf, \
           eigen_k, eigen_m, eigen_t, eigen_s)
    env = os.environ
    p=subprocess.Popen(smartCL,shell=True,cwd=newfilepath)
    retval = p.wait()
    # copy the eigenvector output file needed for adjustment to the user's eigenstrat library directory
    elog = file(os.path.join(newfilepath,eigenlogf),'r').read()
    eeigen = os.path.join(newfilepath,'%s.evec' % eigenouts[1]) # need these for adjusting
    try:
        eigpcaRes = file(eeigen,'r').read()
    except:
        eigpcaRes = ''
    file(eigpca,'w').write(eigpcaRes)
    makePlot(eigpca=eigpca,pdfname=newfiles[0],title=title,nfp=newfilepath,rexe=rexe)
    s = 'Output from %s run at %s<br/>\n' % (progname,timenow())
    lf.write('<h4>%s</h4>\n' % s)
    lf.write('newfilepath=%s, rexe=%s' % (newfilepath,rexe))
    lf.write('(click on the image below to see a much higher quality PDF version)')
    thumbnail = '%s.png' % newfiles[0] # foo.pdf.png - who cares?
    if os.path.exists(os.path.join(newfilepath,thumbnail)):
        lf.write('<table border="0" cellpadding="10" cellspacing="10"><tr><td>\n')
        lf.write('<a href="%s"><img src="%s" alt="%s" hspace="10" align="left" /></a></td></tr></table><br/>\n' \
            % (newfiles[0],thumbnail,explanations[0]))
    allfiles = os.listdir(newfilepath)
    allfiles.sort()
    sizes = [getfSize(x,newfilepath) for x in allfiles]
    lallfiles = ['<li><a href="%s">%s %s</a></li>\n' % (x,x,sizes[i]) for i,x in enumerate(allfiles)] # html list
    lf.write('<div class="document">All Files:<ol>%s</ol></div>' % ''.join(lallfiles))
    lf.write('<div class="document">Log %s contents follow below<p/>' % eigenlogf)
    lf.write('<pre>%s</pre></div>' % elog) # the eigenlog
    s = 'If you need to rerun this analysis, the command line used was\n%s\n<p/>' % (smartCL)
    lf.write(s)
    lf.write(galhtmlpostfix) # end galhtmlprefix div
    lf.close()