def run(self):
     ensureDir(self.cfg.gene_set_dir)
     ensureDir(self.cfg.metrics_dir)
     generate_gene_set_wrapper(self.cfg)
     for gp, gtf in zip(*[self.cfg.geneset_gps.itervalues(), self.cfg.geneset_gtfs.itervalues()]):
         self.convert_gp_to_gtf(gp, gtf)
     self.convert_gp_to_gtf(self.cfg.combined_gp, self.cfg.combined_gtf)
def main():
    args = parse_args()
    if args.name is None:
        args.name = args.database
    ensureDir(args.outDir)
    get_genes(args.database, args.name, args.outDir, args.includeChroms, args.convertUCSCtoEnsembl)
    build_attributes(args.database, args.name, args.outDir)
예제 #3
0
파일: plotting.py 프로젝트: ifiddes/pycbio
def init_image(path, width, height):
    """
    Sets up a PDF object.
    """
    ensureDir(os.path.dirname(path))
    pdf = plt_back.PdfPages(path)
    # width by height in inches
    fig = plt.figure(figsize=(width, height), dpi=300, facecolor='w')
    return fig, pdf
 def run(self):
     for biotype, tm_cfg in self.cfg.tm_plots.iteritems():
         ensureDir(tm_cfg.output_dir)
         paralogy_plot(tm_cfg.ordered_target_genomes, tm_cfg.query_genome, biotype, tm_cfg.para_plot,
                       self.cfg.db)
         cov_plot(tm_cfg.ordered_target_genomes, tm_cfg.query_genome, biotype, tm_cfg.cov_plot, self.cfg.db)
         ident_plot(tm_cfg.ordered_target_genomes, tm_cfg.query_genome, biotype, tm_cfg.ident_plot, self.cfg.db)
         num_pass_excel(tm_cfg.ordered_target_genomes, tm_cfg.query_genome, biotype, tm_cfg.num_pass_excel,
                        self.cfg.db, self.cfg.args.filterChroms)
         num_pass_excel_gene_level(tm_cfg.ordered_target_genomes, tm_cfg.query_genome, biotype,
                                   tm_cfg.num_pass_excel_gene, self.cfg.db, self.cfg.args.filterChroms)
 def start_jobtree(self, args, entry_fn, norestart=False):
     """
     Start a jobTree. Based on the flag norestart, will decide if we should attempt a restart.
     TODO: this hack re-creates the namespace to avoid import issues.
     """
     tmp_cfg = argparse.Namespace()
     tmp_cfg.__dict__.update(vars(args))
     ensureDir(os.path.dirname(args.jobTree))
     jobtree_path = args.jobTree
     if norestart is True or not os.path.exists(jobtree_path) or self.jobtree_is_finished(jobtree_path):
         self.make_jobtree_dir(jobtree_path)
         entry_fn(tmp_cfg)
     else:  # try restarting the tree
         try:
             entry_fn(args)
         except RuntimeError:  # try starting over
             self.make_jobtree_dir(jobtree_path)
             entry_fn(tmp_cfg)
def main():
    args = parse_args()
    data = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
    ref_path = os.path.join(args.ref_dir, args.experiment)
    tgt_path = os.path.join(args.tgt_dir, args.experiment)
    if args.genomes is None:
        args.genomes = os.listdir(ref_path)
    common_name_map = get_common_name_map(args.attrs)
    load_dir(data, 'ref', ref_path, args.genomes, common_name_map)
    load_dir(data, 'tgt', tgt_path, args.genomes, common_name_map)
    base_counts_dir = os.path.join(args.counts_dir, args.experiment)
    construct_counts_tables(data, base_counts_dir)
    deseq_path = os.path.join(args.deseq_dir, args.experiment)
    ensureDir(deseq_path)
    for g in args.genomes:
        runProc(['Rscript', 'run_DEseq.R', g, deseq_path, base_counts_dir])
    out_dir = os.path.join(args.out_dir, args.experiment)
    generate_plots(args.genomes, deseq_path, out_dir)
예제 #7
0
 def __init__(self, paraHost, runDir, paraDir, jobFile=None, cpu=None, mem=None, maxJobs=None, retries=None):
     """"will chdir to run dir.. paraDir should be relative
     to runDir or absolute, jobFile should be relative to runDir
     or absolute.
     """
     self.paraHost = paraHost
     # symlinks can confuse parasol, as it can give two different names for a job.
     self.runDir = os.path.realpath(os.path.abspath(runDir))
     self.paraDir = os.path.realpath(paraDir)
     self.jobFile = jobFile
     self.cpu = cpu
     self.mem = mem
     self.maxJobs = maxJobs
     self.retries = retries
     fileOps.ensureDir(self._mkAbs(self.runDir, self.paraDir))
     if jobFile is not None:
         absJobFile = self._mkAbs(self.runDir, self.jobFile)
         if not os.path.exists(absJobFile):
             raise PycbioException("job file not found: {}".format(absJobFile))
def generate_plots(genomes, deseq_path, out_dir):
    for g in genomes:
        base_out = os.path.join(out_dir, g)
        ensureDir(base_out)
        main_plot = os.path.join(base_out, 'nonzero_log.pdf')
        para_plot = os.path.join(base_out, 'nonzero_log_paralogy.pdf')
        ref_col_name = 'Normalized counts (aligned to mm10)'
        tgt_col_name = 'Normalized counts (aligned to {})'.format(g)
        df = pd.read_table(os.path.join(deseq_path, g + '.data_table.tsv'), sep=' ', header=0, index_col=0)
        df = df[df['ref'] >= 1]
        df = df[df['tgt'] >= 1]
        df = strip_version(df)
        df = calculate_pval_vector(df, ref_col_name, tgt_col_name)
        p = sns.lmplot(x=ref_col_name, y=tgt_col_name, data=df, scatter_kws={"s": 6, "alpha": 0.7}, hue='Adjusted p-value', fit_reg=False)
        p.set(xlim=(0, 10), ylim=(0, 10))
        p.savefig(main_plot, format='pdf')
        df2 = load_paralogy(g, df)
        p2 = sns.lmplot(x=ref_col_name, y=tgt_col_name, data=df2, scatter_kws={"s": 6, "alpha": 0.7}, col='Paralogy', hue='Adjusted p-value', fit_reg=False)
        p2.set(xlim=(0, 10), ylim=(0, 10))
        p2.savefig(para_plot, format='pdf')
        plt.close('all')
예제 #9
0
 def write(self, outDir):
     fileOps.ensureDir(outDir)
     frame = self.__mkFrame(self.title, self.dirPercent, self.below)
     frame.writeFile(outDir + "/index.html")
     self.__writeDirPages(outDir)
예제 #10
0
 def getOutputDir(self):
     """get the path to the output directory to use for this test, create if it doesn't exist"""
     d = self.getTestDir() + "/output";
     fileOps.ensureDir(d)
     return d
예제 #11
0
 def getOutputDir(self):
     """get the path to the output directory to use for this test, create if it doesn't exist"""
     d = self.getTestDir() + "/output"
     fileOps.ensureDir(d)
     return d
 def run(self):
     ensureDir(self.cfg.chaining.out_dir)
     self.start_jobtree(self.cfg.chaining, chainNetStartup, norestart=self.cfg.args.norestart)
def build_out_dirs(out_dir, genome, institute, tissue, experiment):
    out_path = os.path.join(out_dir, genome, institute, tissue, experiment)
    ensureDir(out_path)
    return out_path