Beispiel #1
0
def filter_plot_tar(metrics, src_tar, pass_tar, fail_tar, tempdir, filters):
    allplots = os.path.join(tempdir, 'allplots')
    helpers.makedirs(allplots)
    helpers.extract_tar(src_tar, allplots)

    metrics_data = csvutils.read_csv_and_yaml(metrics)
    all_cells = metrics_data.cell_id.tolist()

    metrics_data = helpers.filter_metrics(metrics_data, filters)
    good_cells = metrics_data.cell_id.tolist()
    bad_cells = [cell for cell in all_cells if cell not in good_cells]

    plotdir = os.path.join(tempdir, 'segs_pass')
    helpers.makedirs(plotdir)
    for cell in good_cells:
        src_path = os.path.join(allplots, 'segments',
                                '{}_{}.png'.format(cell, 'segments'))
        dest_path = os.path.join(plotdir, '{}_{}.png'.format(cell, 'segments'))
        shutil.copyfile(src_path, dest_path)
    helpers.make_tarfile(pass_tar, plotdir)

    plotdir = os.path.join(tempdir, 'segs_fail')
    helpers.makedirs(plotdir)
    for cell in bad_cells:
        src_path = os.path.join(allplots, 'segments',
                                '{}_{}.png'.format(cell, 'segments'))
        dest_path = os.path.join(plotdir, '{}_{}.png'.format(cell, 'segments'))
        shutil.copyfile(src_path, dest_path)
    helpers.make_tarfile(fail_tar, plotdir)
Beispiel #2
0
def run_fastqc(fastq1, fastq2, reports, tempdir, config):
    """
    run fastqc on both fastq files
    run trimgalore if needed, copy if not.
    """
    container_ctx = helpers.get_container_ctx(config['containers'],
                                              'fastqc',
                                              docker_only=True)

    reports_dir = os.path.join(tempdir, 'fastqc_reports')
    if not os.path.exists(reports_dir):
        helpers.makedirs(reports_dir)

    out_html = os.path.join(reports_dir, 'fastqc_R1.html')
    out_plot = os.path.join(reports_dir, 'fastqc_R1.zip')
    if not os.path.getsize(fastq1) == 0:
        bamutils.produce_fastqc_report(fastq1, out_html, out_plot, tempdir,
                                       **container_ctx)
    else:
        warnings.warn("fastq file %s is empty, skipping fastqc" % fastq1)

    out_html = os.path.join(reports_dir, 'fastqc_R2.html')
    out_plot = os.path.join(reports_dir, 'fastqc_R2.zip')
    if not os.path.getsize(fastq2) == 0:
        bamutils.produce_fastqc_report(fastq2, out_html, out_plot, tempdir,
                                       **container_ctx)
    else:
        warnings.warn("fastq file %s is empty, skipping fastqc" % fastq1)

    helpers.make_tarfile(reports, reports_dir)
def run_hmmcopy(
        bam_file,
        corrected_reads_filename,
        segments_filename,
        parameters_filename,
        metrics_filename,
        hmmcopy_tar,
        cell_id,
        hmmparams,
        tempdir,
        docker_image
):

    # generate wig file for hmmcopy
    helpers.makedirs(tempdir)
    readcount_wig = os.path.join(tempdir, 'readcounter.wig')
    corrected_reads = os.path.join(tempdir, 'corrected_reads.csv')

    run_correction_hmmcopy(
        bam_file,
        corrected_reads,
        readcount_wig,
        hmmparams,
        docker_image
    )

    hmmcopy_tempdir = os.path.join(tempdir, '{}_hmmcopy'.format(cell_id))
    helpers.makedirs(hmmcopy_tempdir)

    run_hmmcopy_script(
        corrected_reads,
        hmmcopy_tempdir,
        cell_id,
        hmmparams,
        docker_image
    )

    hmmcopy_outdir = os.path.join(hmmcopy_tempdir, str(0))
    
    csvutils.rewrite_csv_file(
        os.path.join(hmmcopy_outdir, "reads.csv"), corrected_reads_filename,
        dtypes=dtypes()['reads']
    )
    
    csvutils.rewrite_csv_file(
        os.path.join(hmmcopy_outdir, "params.csv"), parameters_filename,
        dtypes=dtypes()['params']
    )
 
    csvutils.rewrite_csv_file(
        os.path.join(hmmcopy_outdir, "segs.csv"), segments_filename,
        dtypes=dtypes()['segs']
    )
    
    csvutils.rewrite_csv_file(
        os.path.join(hmmcopy_outdir, "metrics.csv"), metrics_filename,
        dtypes=dtypes()['metrics']
    )

    helpers.make_tarfile(hmmcopy_tar, hmmcopy_tempdir)
Beispiel #4
0
def run_fastqc(fastq1, fastq2, reports, tempdir, containers):
    """
    run fastqc on both fastq files
    run trimgalore if needed, copy if not.
    """
    reports_dir = os.path.join(tempdir, 'fastqc_reports')
    if not os.path.exists(reports_dir):
        helpers.makedirs(reports_dir)

    # empty fastq files
    if os.stat(fastq1).st_size < 100 and os.stat(fastq2).st_size < 100:
        helpers.make_tarfile(reports, reports_dir)
        return

    out_html = os.path.join(reports_dir, 'fastqc_R1.html')
    out_plot = os.path.join(reports_dir, 'fastqc_R1.zip')
    if not os.path.getsize(fastq1) == 0:
        bamutils.produce_fastqc_report(fastq1, out_html, out_plot, tempdir,
                                       docker_image=containers['fastqc'])
    else:
        logging.getLogger("single_cell.align.tasks").warn(
            "fastq file %s is empty, skipping fastqc" % fastq1)

    out_html = os.path.join(reports_dir, 'fastqc_R2.html')
    out_plot = os.path.join(reports_dir, 'fastqc_R2.zip')
    if not os.path.getsize(fastq2) == 0:
        bamutils.produce_fastqc_report(fastq2, out_html, out_plot, tempdir,
                                       docker_image=containers['fastqc'])
    else:
        logging.getLogger("single_cell.align.tasks").warn(
            "fastq file %s is empty, skipping fastqc" % fastq1)

    helpers.make_tarfile(reports, reports_dir)
Beispiel #5
0
def create_hmmcopy_data_tar(infiles, tar_output, tempdir):
    helpers.makedirs(tempdir)

    for key, infile in infiles.items():
        helpers.extract_tar(infile, os.path.join(tempdir, key))

    helpers.make_tarfile(tar_output, tempdir)
def merge_pdf(in_filenames, outfilenames, metrics, cell_filters, tempdir,
              labels):

    helpers.makedirs(tempdir)

    good_cells = get_good_cells(metrics, cell_filters, '/hmmcopy/metrics/0')

    grouped_data = group_cells_by_row(good_cells,
                                      metrics,
                                      '/hmmcopy/metrics/0',
                                      sort_by_col=True)

    for infiles, outfiles, label in zip(in_filenames, outfilenames, labels):

        extension = os.path.splitext(infiles[good_cells[0]])[-1]

        plotdir = os.path.join(tempdir, label)

        helpers.makedirs(plotdir)

        for cell in good_cells:
            shutil.copyfile(
                infiles[cell],
                os.path.join(plotdir, cell + "_" + label + extension))

        helpers.make_tarfile(outfiles, plotdir)
Beispiel #7
0
def tar_align_data(infiles, tar_output, tempdir):
    helpers.makedirs(tempdir)

    for infile in infiles:
        for key, filepath in infile.items():
            temp_path = os.path.join(
                tempdir, '{}_{}'.format(key, os.path.basename(filepath)))
            helpers.copyfile(filepath, temp_path)

    helpers.make_tarfile(tar_output, tempdir)
Beispiel #8
0
def align_lanes(
        fastq1, fastq2, output, output_mt, reports, tempdir, reference,
        sample_info, cell_id, library_id, adapter,
        adapter2, fastqscreen_detailed_metrics,
        fastqscreen_summary_metrics, fastqscreen_params, trim, center, mt_chrom_name='MT'
):
    lane_bams = []
    detailed_counts = []
    summary_counts = []

    for lane_id in fastq1:
        reports_dir = os.path.join(tempdir, 'reports_per_lane', lane_id)

        helpers.makedirs(reports_dir)

        lane_tempdir = os.path.join(tempdir, lane_id, 'lane_temp')
        lane_bam = os.path.join(tempdir, lane_id, 'aligned.bam')

        lane_bams.append(lane_bam)

        screen_detailed = os.path.join(reports_dir, 'detailed.txt')
        screen_summary = os.path.join(reports_dir, 'summary.txt')

        detailed_counts.append(screen_detailed)
        summary_counts.append(screen_summary)

        align_pe(
            fastq1[lane_id], fastq2[lane_id], lane_bam, reports_dir,
            lane_tempdir, reference, trim, center, sample_info, cell_id, lane_id,
            library_id, adapter, adapter2,
            screen_detailed, screen_summary, fastqscreen_params,
        )

    helpers.make_tarfile(reports, os.path.join(tempdir, 'reports_per_lane'))

    merge_postprocess_bams(lane_bams, output, os.path.join(tempdir, 'merge_bams'))

    fastqscreen.merge_fastq_screen_counts(
        detailed_counts, summary_counts,
        fastqscreen_detailed_metrics, fastqscreen_summary_metrics,
        fastqscreen_params
    )

    extract_mt_chromosome(output, output_mt, mt_chrom_name=mt_chrom_name)
Beispiel #9
0
def merge_pdf(in_filenames, outfilenames, metrics, cell_filters, tempdir,
              labels):
    helpers.makedirs(tempdir)

    good_cells = get_good_cells(metrics, cell_filters)

    for infiles, outfiles, label in zip(in_filenames, outfilenames, labels):

        extension = os.path.splitext(infiles[good_cells[0]])[-1]

        plotdir = os.path.join(tempdir, label)

        helpers.makedirs(plotdir)

        for cell in good_cells:
            shutil.copyfile(
                infiles[cell],
                os.path.join(plotdir, cell + "_" + label + extension))

        helpers.make_tarfile(outfiles, plotdir)