def summarize_grading(samples, vkey="validate"): """Provide summaries of grading results across all samples. Handles both traditional pipelines (validation part of variants) and CWL pipelines (validation at top level) """ samples = list(utils.flatten(samples)) if not _has_grading_info(samples, vkey): return [[d] for d in samples] validate_dir = utils.safe_makedir( os.path.join(samples[0]["dirs"]["work"], vkey)) header = ["sample", "caller", "variant.type", "category", "value"] validated, out = _group_validate_samples(samples, vkey) for vname, vitems in validated.items(): out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_data = [] plot_files = [] for data in sorted( vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))): validations = [ variant.get(vkey) for variant in data.get("variants", []) ] validations = [v for v in validations if v] if len(validations) == 0 and vkey in data: validations = [data.get(vkey)] for validate in validations: if validate: validate["grading_summary"] = out_csv if validate.get("grading"): for row in _get_validate_plotdata_yaml( validate["grading"], data): writer.writerow(row) plot_data.append(row) elif validate.get("summary") and not validate.get( "summary") == "None": if isinstance(validate["summary"], (list, tuple)): plot_files.extend( list(set(validate["summary"]))) else: plot_files.append(validate["summary"]) if plot_files: plots = validateplot.classifyplot_from_plotfiles( plot_files, out_csv) elif plot_data: plots = validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0]) else: plots = [] for data in vitems: if data.get(vkey): data[vkey]["grading_plots"] = plots for variant in data.get("variants", []): if variant.get(vkey): variant[vkey]["grading_plots"] = plots out.append([data]) return out
def summarize_grading(samples, vkey="validate"): """Provide summaries of grading results across all samples. Handles both traditional pipelines (validation part of variants) and CWL pipelines (validation at top level) """ samples = list(utils.flatten(samples)) if not _has_grading_info(samples, vkey): return [[d] for d in samples] validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], vkey)) header = ["sample", "caller", "variant.type", "category", "value"] _summarize_combined(samples, vkey) validated, out = _group_validate_samples(samples, vkey, (["metadata", "validate_batch"], ["metadata", "batch"], ["description"])) for vname, vitems in validated.items(): out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_data = [] plot_files = [] for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x)) or ""): validations = [variant.get(vkey) for variant in data.get("variants", []) if isinstance(variant, dict)] validations = [v for v in validations if v] if len(validations) == 0 and vkey in data: validations = [data.get(vkey)] for validate in validations: if validate: validate["grading_summary"] = out_csv if validate.get("grading"): for row in _get_validate_plotdata_yaml(validate["grading"], data): writer.writerow(row) plot_data.append(row) elif validate.get("summary") and not validate.get("summary") == "None": if isinstance(validate["summary"], (list, tuple)): plot_files.extend(list(set(validate["summary"]))) else: plot_files.append(validate["summary"]) if plot_files: plots = validateplot.classifyplot_from_plotfiles(plot_files, out_csv) elif plot_data: plots = validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0]) else: plots = [] for data in vitems: if data.get(vkey): data[vkey]["grading_plots"] = plots for variant in data.get("variants", []): if isinstance(variant, dict) and variant.get(vkey): variant[vkey]["grading_plots"] = plots out.append([data]) return out
def summarize_grading(samples): """Provide summaries of grading results across all samples. """ samples = [utils.to_single_data(d) for d in samples] if not _has_grading_info(samples): return [[d] for d in samples] validate_dir = utils.safe_makedir( os.path.join(samples[0]["dirs"]["work"], "validate")) header = ["sample", "caller", "variant.type", "category", "value"] validated, out = _group_validate_samples(samples) for vname, vitems in validated.iteritems(): out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_data = [] plot_files = [] for data in sorted( vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))): for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_summary"] = out_csv if tz.get_in(["validate", "grading"], variant): for row in _get_validate_plotdata_yaml( variant, data): writer.writerow(row) plot_data.append(row) elif tz.get_in(["validate", "summary"], variant): plot_files.append(variant["validate"]["summary"]) if plot_files: plots = validateplot.classifyplot_from_plotfiles( plot_files, out_csv) elif plot_data: plots = validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0]) else: plots = None for data in vitems: for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_plots"] = plots out.append([data]) return out
def summarize_grading(samples): """Provide summaries of grading results across all samples. """ if not _has_grading_info(samples): return samples validate_dir = utils.safe_makedir( os.path.join(samples[0][0]["dirs"]["work"], "validate")) out_csv = os.path.join(validate_dir, "grading-summary.csv") header = ["sample", "caller", "variant.type", "category", "value"] out = [] with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_num = 0 for data in (x[0] for x in samples): plot_data = [] for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_summary"] = out_csv with open(variant["validate"]["grading"]) as in_handle: grade_stats = yaml.load(in_handle) for sample_stats in grade_stats: sample = sample_stats["sample"] for vtype, cat, val in _flatten_grading(sample_stats): row = [ sample, variant.get("variantcaller", ""), vtype, cat, val ] writer.writerow(row) plot_data.append(row) plots = (validateplot.create( plot_data, header, plot_num, data["config"], os.path.splitext(out_csv)[0]) if plot_data else None) if plots: plot_num += 1 for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_plots"] = plots out.append([data]) return out
def summarize_grading(samples): """Provide summaries of grading results across all samples. """ samples = [utils.to_single_data(d) for d in samples] if not _has_grading_info(samples): return [[d] for d in samples] validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], "validate")) header = ["sample", "caller", "variant.type", "category", "value"] validated, out = _group_validate_samples(samples) for vname, vitems in validated.iteritems(): out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_data = [] plot_files = [] for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))): for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_summary"] = out_csv if tz.get_in(["validate", "grading"], variant): for row in _get_validate_plotdata_yaml(variant, data): writer.writerow(row) plot_data.append(row) elif tz.get_in(["validate", "summary"], variant): plot_files.append(variant["validate"]["summary"]) if plot_files: plots = validateplot.classifyplot_from_plotfiles(plot_files, out_csv) elif plot_data: plots = validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0]) else: plots = None for data in vitems: for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_plots"] = plots out.append([data]) return out
def summarize_grading(samples): """Provide summaries of grading results across all samples. """ if not _has_grading_info(samples): return samples validate_dir = utils.safe_makedir(os.path.join(samples[0][0]["dirs"]["work"], "validate")) header = ["sample", "caller", "variant.type", "category", "value"] validated, out = _group_validate_samples(samples) for vname, vitems in validated.iteritems(): out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(header) plot_data = [] for data in vitems: for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_summary"] = out_csv with open(variant["validate"]["grading"]) as in_handle: grade_stats = yaml.load(in_handle) for sample_stats in grade_stats: sample = sample_stats["sample"] for vtype, cat, val in _flatten_grading(sample_stats): row = [sample, variant.get("variantcaller", ""), vtype, cat, val] writer.writerow(row) plot_data.append(row) plots = ( validateplot.create(plot_data, header, 0, data["config"], os.path.splitext(out_csv)[0]) if plot_data else None ) for data in vitems: for variant in data.get("variants", []): if variant.get("validate"): variant["validate"]["grading_plots"] = plots out.append([data]) return out