Ejemplo n.º 1
0
 def build_qsub(self):
     """
     Builds and writes this CallObject's qsub script to current working directory
     using options provided under the "qsub_options" sub-tree in the yaml config file.
     """
     nicknames = {'tophat':'th',
                  'cufflinks':'cl',
                  'cuffmerge':'cm',
                  'cuffdiff':'cd',}
     
     qsub_options = self.yargs.qsub_options
     
     # set keyword args for template
     kw = Bunch()
     kw.queues = qsub_options.queues
     kw.datahome = qsub_options.datahome
     kw.core_range = qsub_options.core_range
     kw.email_addy = self.email_info.email_to
     kw.call_id = self.call_id
     job_name = "%s_%s" % (nicknames[self.prog_name], '_'.join(self.call_id.split('_')[1:]))
     kw.job_name = job_name
     kw.out_dir = self.out_dir
     kw.ld_library_path = qsub_options.ld_library_path
     
     # need to make sure we use the number of cores that the SGE gave us
     kw.cmd_str = self.cmd_string.replace('-p %s' % (self.opt_dict['p']),'-p $CORES')
     
     qsub_template = Template(filename=qsub_options.template)
     out_file = open('%s.qsub.sh' % (self.call_id),'w')
     qsub_string = qsub_template.render(**kw)
     out_file.write(qsub_string)
     out_file.close()
Ejemplo n.º 2
0
def main():
    """
    The main loop.  Lets ROCK!
    """

    desc = """This script reads the files in a cuffdiff output directory into cummeRbund, generates some standard preliminary plots, and saves the output."""

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument('--version', action='version', version='%(prog)s ' + blacktie.__version__,
                        help="""Print version number.""")    
    parser.add_argument('--cuffdiff-dir', type=str,
                        help="""Path to a cuffdiff output directory.""")
    #parser.add_argument('--cummerbund-db', type=str,
                        #help="""Path to a pre-built cummeRbund 'cuffData.db'. (this is rarely specified directly; usually --cuffdiff-dir works fine)""")
    parser.add_argument('--gtf-path', type=str, default='NULL',
                        help="""Path to gtf file used in cuffdiff analysis. This will provide transcript model information.""")
    parser.add_argument('--genome', type=str, default='NULL',
                        help="""String indicating which genome build the .gtf annotations are for (e.g. 'hg19' or 'mm9').""")
    parser.add_argument('--out', type=str, 
                        help="""A base directory to add to our saved plots into.""")
    parser.add_argument('--file-type', type=str, choices=['pdf','jpeg','png','ps'], default='pdf',
                        help="""The type of output file to use when saving our plots. (default: %(default)s)""")
    

    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    args = parser.parse_args()    
    

    # import the cummeRbund libray to the R workspace
    import_cummeRbund_library()
        
    
    # read in the cuffdiff data
    cuff = r.readCufflinks(dir=args.cuffdiff_dir, gtfFile=args.gtf_path, genome=args.genome)
    
    # Find out if we have replicates
    genes_rep_fpkm = r.repFpkm(r.genes(cuff))
    replicate_ids = set(genes_rep_fpkm[2])
    
    if len(replicate_ids) > 1:
        we_have_replicates = True
    else:
        we_have_replicates = False
    
    # Store my plots here
    rplots = Bunch()
    
    
    # dispersion plot
    rplots.dispersionPlot = r.dispersionPlot(r.genes(cuff))
    
    # SCV plots
    if we_have_replicates:
        rplots.fpkmSCVPlot_genes = r.fpkmSCVPlot(r.genes(cuff))
        rplots.fpkmSCVPlot_isoforms = r.fpkmSCVPlot(r.isoforms(cuff))
    else:
        pass
    
    # Density Plots
    rplots.csDensity = r.csDensity(r.genes(cuff))    
    
    if we_have_replicates:
        rplots.csDensity_reps = r.csDensity(r.genes(cuff),replicates='T')
    else:
        pass
    
    # Box Plots
    rplots.csBoxplot = r.csBoxplot(r.genes(cuff))    
    
    if we_have_replicates:
        rplots.csBoxplot_reps = r.csBoxplot(r.genes(cuff),replicates='T')
    else:
        pass
    
    # Scatter Matrix
    rplots.csScatterMatrix = r.csScatterMatrix(r.genes(cuff))
    
    
    
    # TODO: csDendro does not use ggplot2 it seems so ggsave() does not work. When issue is fixed, uncomment this.
##    # Dendrograms
##    rplots.csDendro = r.csDendro(r.genes(cuff))    
##    
##    if we_have_replicates:
##        rplots.csDendro_reps = r.csDendro(r.genes(cuff),replicates='T')
        
    # Volcano Matrix
    rplots.csVolcanoMatrix = r.csVolcanoMatrix(r.genes(cuff))
    
    # Sig Matrix
    rplots.sigMatrix = r.sigMatrix(cuff,level='genes',alpha=0.05)
    
    
    # get significant genes
    mySigGeneIds = r.getSig(cuff,alpha=0.05,level='genes')
    mySigGenes = r.getGenes(cuff,mySigGeneIds)
    print "Significant Genes: %s" % (len(mySigGeneIds))
    
    # Preliminary Clustering
    ic = r.csCluster(mySigGenes,k=20)
    rplots.csClusterPlot = r.csClusterPlot(ic)
    
    # print the plots
    print_my_plots(r, rplots, out=args.out, file_type=args.file_type)