def _segment_normalized_gatk(cnr_file, work_dir, paired): """Segmentation of normalized inputs using GATK4, converting into standard input formats. """ work_dir = utils.safe_makedir(os.path.join(work_dir, "gatk-cnv")) seg_file = gatkcnv.model_segments(cnr_file, work_dir, paired)["seg"] std_seg_file = seg_file.replace(".cr.seg", ".seg") if not utils.file_uptodate(std_seg_file, seg_file): with file_transaction(std_seg_file) as tx_out_file: df = pd.read_csv(seg_file, sep="\t", comment="@", header=0, names=["chrom", "loc.start", "loc.end", "num.mark", "seg.mean"]) df.insert(0, "ID", [dd.get_sample_name(paired.tumor_data)] * len(df)) df.to_csv(tx_out_file, sep="\t", header=True, index=False) std_cnr_file = os.path.join(work_dir, "%s.cnr" % dd.get_sample_name(paired.tumor_data)) if not utils.file_uptodate(std_cnr_file, cnr_file): with file_transaction(std_cnr_file) as tx_out_file: logdf = pd.read_csv(cnr_file, sep="\t", comment="@", header=0, names=["chrom", "start", "end", "log2"]) covdf = pd.read_csv(tz.get_in(["depth", "bins", "antitarget"], paired.tumor_data), sep="\t", header=None, names=["chrom", "start", "end", "orig.name", "depth", "gene"]) df = pd.merge(logdf, covdf, on=["chrom", "start", "end"]) del df["orig.name"] df = df[["chrom", "start", "end", "gene", "log2", "depth"]] df.insert(6, "weight", [1.0] * len(df)) df.to_csv(tx_out_file, sep="\t", header=True, index=False) return std_cnr_file, std_seg_file