def filter_plot_tar(metrics, src_tar, pass_tar, fail_tar, tempdir, filters): allplots = os.path.join(tempdir, 'allplots') helpers.makedirs(allplots) helpers.extract_tar(src_tar, allplots) metrics_data = csvutils.read_csv_and_yaml(metrics) all_cells = metrics_data.cell_id.tolist() metrics_data = helpers.filter_metrics(metrics_data, filters) good_cells = metrics_data.cell_id.tolist() bad_cells = [cell for cell in all_cells if cell not in good_cells] plotdir = os.path.join(tempdir, 'segs_pass') helpers.makedirs(plotdir) for cell in good_cells: src_path = os.path.join(allplots, 'segments', '{}_{}.png'.format(cell, 'segments')) dest_path = os.path.join(plotdir, '{}_{}.png'.format(cell, 'segments')) shutil.copyfile(src_path, dest_path) helpers.make_tarfile(pass_tar, plotdir) plotdir = os.path.join(tempdir, 'segs_fail') helpers.makedirs(plotdir) for cell in bad_cells: src_path = os.path.join(allplots, 'segments', '{}_{}.png'.format(cell, 'segments')) dest_path = os.path.join(plotdir, '{}_{}.png'.format(cell, 'segments')) shutil.copyfile(src_path, dest_path) helpers.make_tarfile(fail_tar, plotdir)
def run_fastqc(fastq1, fastq2, reports, tempdir, config): """ run fastqc on both fastq files run trimgalore if needed, copy if not. """ container_ctx = helpers.get_container_ctx(config['containers'], 'fastqc', docker_only=True) reports_dir = os.path.join(tempdir, 'fastqc_reports') if not os.path.exists(reports_dir): helpers.makedirs(reports_dir) out_html = os.path.join(reports_dir, 'fastqc_R1.html') out_plot = os.path.join(reports_dir, 'fastqc_R1.zip') if not os.path.getsize(fastq1) == 0: bamutils.produce_fastqc_report(fastq1, out_html, out_plot, tempdir, **container_ctx) else: warnings.warn("fastq file %s is empty, skipping fastqc" % fastq1) out_html = os.path.join(reports_dir, 'fastqc_R2.html') out_plot = os.path.join(reports_dir, 'fastqc_R2.zip') if not os.path.getsize(fastq2) == 0: bamutils.produce_fastqc_report(fastq2, out_html, out_plot, tempdir, **container_ctx) else: warnings.warn("fastq file %s is empty, skipping fastqc" % fastq1) helpers.make_tarfile(reports, reports_dir)
def run_hmmcopy( bam_file, corrected_reads_filename, segments_filename, parameters_filename, metrics_filename, hmmcopy_tar, cell_id, hmmparams, tempdir, docker_image ): # generate wig file for hmmcopy helpers.makedirs(tempdir) readcount_wig = os.path.join(tempdir, 'readcounter.wig') corrected_reads = os.path.join(tempdir, 'corrected_reads.csv') run_correction_hmmcopy( bam_file, corrected_reads, readcount_wig, hmmparams, docker_image ) hmmcopy_tempdir = os.path.join(tempdir, '{}_hmmcopy'.format(cell_id)) helpers.makedirs(hmmcopy_tempdir) run_hmmcopy_script( corrected_reads, hmmcopy_tempdir, cell_id, hmmparams, docker_image ) hmmcopy_outdir = os.path.join(hmmcopy_tempdir, str(0)) csvutils.rewrite_csv_file( os.path.join(hmmcopy_outdir, "reads.csv"), corrected_reads_filename, dtypes=dtypes()['reads'] ) csvutils.rewrite_csv_file( os.path.join(hmmcopy_outdir, "params.csv"), parameters_filename, dtypes=dtypes()['params'] ) csvutils.rewrite_csv_file( os.path.join(hmmcopy_outdir, "segs.csv"), segments_filename, dtypes=dtypes()['segs'] ) csvutils.rewrite_csv_file( os.path.join(hmmcopy_outdir, "metrics.csv"), metrics_filename, dtypes=dtypes()['metrics'] ) helpers.make_tarfile(hmmcopy_tar, hmmcopy_tempdir)
def run_fastqc(fastq1, fastq2, reports, tempdir, containers): """ run fastqc on both fastq files run trimgalore if needed, copy if not. """ reports_dir = os.path.join(tempdir, 'fastqc_reports') if not os.path.exists(reports_dir): helpers.makedirs(reports_dir) # empty fastq files if os.stat(fastq1).st_size < 100 and os.stat(fastq2).st_size < 100: helpers.make_tarfile(reports, reports_dir) return out_html = os.path.join(reports_dir, 'fastqc_R1.html') out_plot = os.path.join(reports_dir, 'fastqc_R1.zip') if not os.path.getsize(fastq1) == 0: bamutils.produce_fastqc_report(fastq1, out_html, out_plot, tempdir, docker_image=containers['fastqc']) else: logging.getLogger("single_cell.align.tasks").warn( "fastq file %s is empty, skipping fastqc" % fastq1) out_html = os.path.join(reports_dir, 'fastqc_R2.html') out_plot = os.path.join(reports_dir, 'fastqc_R2.zip') if not os.path.getsize(fastq2) == 0: bamutils.produce_fastqc_report(fastq2, out_html, out_plot, tempdir, docker_image=containers['fastqc']) else: logging.getLogger("single_cell.align.tasks").warn( "fastq file %s is empty, skipping fastqc" % fastq1) helpers.make_tarfile(reports, reports_dir)
def create_hmmcopy_data_tar(infiles, tar_output, tempdir): helpers.makedirs(tempdir) for key, infile in infiles.items(): helpers.extract_tar(infile, os.path.join(tempdir, key)) helpers.make_tarfile(tar_output, tempdir)
def merge_pdf(in_filenames, outfilenames, metrics, cell_filters, tempdir, labels): helpers.makedirs(tempdir) good_cells = get_good_cells(metrics, cell_filters, '/hmmcopy/metrics/0') grouped_data = group_cells_by_row(good_cells, metrics, '/hmmcopy/metrics/0', sort_by_col=True) for infiles, outfiles, label in zip(in_filenames, outfilenames, labels): extension = os.path.splitext(infiles[good_cells[0]])[-1] plotdir = os.path.join(tempdir, label) helpers.makedirs(plotdir) for cell in good_cells: shutil.copyfile( infiles[cell], os.path.join(plotdir, cell + "_" + label + extension)) helpers.make_tarfile(outfiles, plotdir)
def tar_align_data(infiles, tar_output, tempdir): helpers.makedirs(tempdir) for infile in infiles: for key, filepath in infile.items(): temp_path = os.path.join( tempdir, '{}_{}'.format(key, os.path.basename(filepath))) helpers.copyfile(filepath, temp_path) helpers.make_tarfile(tar_output, tempdir)
def align_lanes( fastq1, fastq2, output, output_mt, reports, tempdir, reference, sample_info, cell_id, library_id, adapter, adapter2, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_params, trim, center, mt_chrom_name='MT' ): lane_bams = [] detailed_counts = [] summary_counts = [] for lane_id in fastq1: reports_dir = os.path.join(tempdir, 'reports_per_lane', lane_id) helpers.makedirs(reports_dir) lane_tempdir = os.path.join(tempdir, lane_id, 'lane_temp') lane_bam = os.path.join(tempdir, lane_id, 'aligned.bam') lane_bams.append(lane_bam) screen_detailed = os.path.join(reports_dir, 'detailed.txt') screen_summary = os.path.join(reports_dir, 'summary.txt') detailed_counts.append(screen_detailed) summary_counts.append(screen_summary) align_pe( fastq1[lane_id], fastq2[lane_id], lane_bam, reports_dir, lane_tempdir, reference, trim, center, sample_info, cell_id, lane_id, library_id, adapter, adapter2, screen_detailed, screen_summary, fastqscreen_params, ) helpers.make_tarfile(reports, os.path.join(tempdir, 'reports_per_lane')) merge_postprocess_bams(lane_bams, output, os.path.join(tempdir, 'merge_bams')) fastqscreen.merge_fastq_screen_counts( detailed_counts, summary_counts, fastqscreen_detailed_metrics, fastqscreen_summary_metrics, fastqscreen_params ) extract_mt_chromosome(output, output_mt, mt_chrom_name=mt_chrom_name)
def merge_pdf(in_filenames, outfilenames, metrics, cell_filters, tempdir, labels): helpers.makedirs(tempdir) good_cells = get_good_cells(metrics, cell_filters) for infiles, outfiles, label in zip(in_filenames, outfilenames, labels): extension = os.path.splitext(infiles[good_cells[0]])[-1] plotdir = os.path.join(tempdir, label) helpers.makedirs(plotdir) for cell in good_cells: shutil.copyfile( infiles[cell], os.path.join(plotdir, cell + "_" + label + extension)) helpers.make_tarfile(outfiles, plotdir)