def run_cluster(*data): """ Run seqcluster cluster to detect smallRNA clusters """ sample = data[0][0] work_dir = dd.get_work_dir(sample) out_dir = op.join(work_dir, "seqcluster", "cluster") out_dir = op.abspath(safe_makedir(out_dir)) prepare_dir = op.join(work_dir, "seqcluster", "prepare") bam_file = op.join(work_dir, "align", "seqs.bam") cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(sample), dd.get_srna_gtf_file(sample)) sample["report"] = _report(sample, dd.get_ref_file(sample)) sample["seqcluster"] = out_dir out_mirna = _make_isomir_counts(data, out_dir=op.join(work_dir, "mirbase")) if out_mirna: sample = dd.set_mirna_counts(sample, out_mirna[0]) sample = dd.set_isomir_counts(sample, out_mirna[1]) out_novel = _make_isomir_counts(data, "seqbuster_novel", op.join(work_dir, "mirdeep2"), "_novel") novel_db = mirdeep.run(data) if out_novel: sample = dd.set_novel_mirna_counts(sample, out_novel[0]) sample = dd.set_novel_isomir_counts(sample, out_novel[1]) data[0][0] = sample return data
def run_cluster(*data): """ Run seqcluster cluster to detect smallRNA clusters """ work_dir = dd.get_work_dir(data[0][0]) out_dir = os.path.join(work_dir, "seqcluster", "cluster") out_dir = os.path.abspath(safe_makedir(out_dir)) prepare_dir = op.join(work_dir, "seqcluster", "prepare") bam_file = op.join(work_dir, "align", "seqs.bam") cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(data[0][0]), dd.get_srna_gtf_file(data[0][0])) report_file = _report(data[0][0], dd.get_ref_file(data[0][0])) for sample in data: sample[0]["seqcluster"] = out_dir out_mirna, out_isomir = _make_isomir_counts(data) data[0][0]["mirna_counts"] = out_mirna data[0][0]["isomir_counts"] = out_isomir mirdeep.run(data) return data
def run_cluster(*data): """ Run seqcluster cluster to detect smallRNA clusters """ work_dir = dd.get_work_dir(data[0][0]) out_dir = os.path.join(work_dir, "seqcluster", "cluster") out_dir = os.path.abspath(safe_makedir(out_dir)) prepare_dir = op.join(work_dir, "seqcluster", "prepare") bam_file = op.join(work_dir, "align", "seqs.bam") cluster_dir = _cluster(bam_file, prepare_dir, out_dir, dd.get_ref_file(data[0][0]), dd.get_srna_gtf_file(data[0][0])) report_file = _report(data[0][0], dd.get_ref_file(data[0][0])) for sample in data: sample[0]["seqcluster"] = out_dir out_mirna, out_isomir = _make_isomir_counts(data) data[0][0]["mirna_counts"] = out_mirna data[0][0]["isomir_counts"] = out_isomir novel_db = mirdeep.run(data) return data
def run_align(*data): """ Prepare data to run alignment step, only once for each project """ work_dir = dd.get_work_dir(data[0][0]) out_dir = op.join(work_dir, "seqcluster", "prepare") seq_out = op.join(out_dir, "seqs.fastq") bam_dir = op.join(work_dir, "align") new_bam_file = op.join(bam_dir, "seqs.bam") tools = dd.get_expression_caller(data[0][0]) if not file_exists(new_bam_file): sample = process_alignment(data[0][0], [seq_out, None]) bam_file = dd.get_work_bam(sample[0][0]) shutil.move(bam_file, new_bam_file) shutil.move(bam_file + ".bai", new_bam_file + ".bai") shutil.rmtree(op.join(bam_dir, sample[0][0]["rgnames"]['sample'])) for sample in data: sample[0]["align_bam"] = sample[0]["clean_fastq"] if "mirdeep2" in tools: novel_db = mirdeep.run(data) return data
def run_align(*data): """ Prepare data to run alignment step, only once for each project """ work_dir = dd.get_work_dir(data[0][0]) out_dir = op.join(work_dir, "seqcluster", "prepare") seq_out = op.join(out_dir, "seqs.fastq") bam_dir = op.join(work_dir, "align") new_bam_file = op.join(bam_dir, "seqs.bam") tools = dd.get_expression_caller(data[0][0]) if not file_exists(new_bam_file): sample = process_alignment(data[0][0], [seq_out, None]) bam_file = dd.get_work_bam(sample[0][0]) shutil.move(bam_file, new_bam_file) shutil.move(bam_file + ".bai", new_bam_file + ".bai") shutil.rmtree(op.join(bam_dir, sample[0][0]["rgnames"]['sample'])) for sample in data: # sample[0]["align_bam"] = sample[0]["clean_fastq"] sample[0]["cluster_bam"] = new_bam_file if "mirdeep2" in tools: novel_db = mirdeep.run(data) return data