def create_ataqv_report(samples): """ make the ataqv report from a set of ATAC-seq samples """ data = samples[0][0] new_samples = [] reportdir = os.path.join(dd.get_work_dir(data), "qc", "ataqv") sentinel = os.path.join(reportdir, "index.html") if utils.file_exists(sentinel): ataqv_output = {"base": sentinel, "secondary": get_ataqv_report_files(reportdir)} new_data = [] for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ["ataqv_report"], ataqv_output) new_data.append(data) return dd.get_samples_from_datalist(new_data) mkarv = config_utils.get_program("mkarv", dd.get_config(data)) ataqv_files = [] for data in dd.sample_data_iterator(samples): qc = dd.get_summary_qc(data) ataqv_file = tz.get_in(("ataqv", "base"), qc, None) if ataqv_file and utils.file_exists(ataqv_file): ataqv_files.append(ataqv_file) if not ataqv_files: return samples ataqv_json_file_string = " ".join(ataqv_files) with file_transaction(reportdir) as txreportdir: cmd = f"{mkarv} {txreportdir} {ataqv_json_file_string}" message = f"Creating ataqv report from {ataqv_json_file_string}." do.run(cmd, message) new_data = [] ataqv_output = {"base": sentinel, "secondary": get_ataqv_report_files(reportdir)} for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ["ataqv_report"], ataqv_output) new_data.append(data) return dd.get_samples_from_datalist(new_data)
def create_peaktable(samples): """create a table of peak counts per sample to use with differential peak calling """ data = dd.get_data_from_sample(samples[0]) peakcounts = [] out_dir = os.path.join(dd.get_work_dir(data), "consensus") out_file = os.path.join(out_dir, "consensus-counts.tsv") if dd.get_chip_method(data) == "chip": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts"), data)) elif dd.get_chip_method(data) == "atac": for data in dd.sample_data_iterator(samples): if bam.is_paired(dd.get_work_bam(data)): peakcounts.append(tz.get_in(("peak_counts", "NF"), data)) else: logger.info(f"Creating peak table from full BAM file because " f"{dd.get_work_bam(data)} is single-ended.") peakcounts.append(tz.get_in(("peak_counts", "full"), data)) combined_peaks = count.combine_count_files(peakcounts, out_file, ext=".counts") new_data = [] for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ("peak_counts", "peaktable"), combined_peaks) new_data.append(data) new_samples = dd.get_samples_from_datalist(new_data) return new_samples
def create_peaktable(samples): """create a table of peak counts per sample to use with differential peak calling """ data = dd.get_data_from_sample(samples[0]) peakcounts = [] out_dir = os.path.join(dd.get_work_dir(data), "consensus") out_file = os.path.join(out_dir, "consensus-counts.tsv") if dd.get_chip_method(data) == "chip": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts"), data)) elif dd.get_chip_method(data) == "atac": for data in dd.sample_data_iterator(samples): peakcounts.append(tz.get_in(("peak_counts", "NF"), data)) combined_peaks = count.combine_count_files(peakcounts, out_file, ext=".counts") new_data = [] for data in dd.sample_data_iterator(samples): data = tz.assoc_in(data, ("peak_counts", "peaktable"), combined_peaks) new_data.append(data) new_samples = dd.get_samples_from_datalist(new_data) return new_samples