def _summarize_combined(samples, vkey): """Prepare summarized CSV and plot files for samples to combine together. Helps handle cases where we want to summarize over multiple samples. """ validate_dir = utils.safe_makedir(os.path.join(samples[0]["dirs"]["work"], vkey)) combined, _ = _group_validate_samples(samples, vkey, [["metadata", "validate_combine"]]) for vname, vitems in combined.items(): if vname: cur_combined = collections.defaultdict(int) for data in sorted(vitems, key=lambda x: x.get("lane", dd.get_sample_name(x))): validations = [variant.get(vkey) for variant in data.get("variants", [])] validations = [v for v in validations if v] if len(validations) == 0 and vkey in data: validations = [data.get(vkey)] for validate in validations: with open(validate["summary"]) as in_handle: reader = csv.reader(in_handle) next(reader) # header for _, caller, vtype, metric, value in reader: cur_combined[(caller, vtype, metric)] += int(value) out_csv = os.path.join(validate_dir, "grading-summary-%s.csv" % vname) with open(out_csv, "w") as out_handle: writer = csv.writer(out_handle) header = ["sample", "caller", "vtype", "metric", "value"] writer.writerow(header) for (caller, variant_type, category), val in cur_combined.items(): writer.writerow(["combined-%s" % vname, caller, variant_type, category, val]) plots = validateplot.classifyplot_from_valfile(out_csv)
def evaluate(data): """Provide evaluations for multiple callers split by structural variant type. """ work_dir = utils.safe_makedir( os.path.join(data["dirs"]["work"], "structural", dd.get_sample_name(data), "validate")) truth_sets = tz.get_in(["config", "algorithm", "svvalidate"], data) if truth_sets and data.get("sv"): if isinstance(truth_sets, dict): val_summary, df_csv = _evaluate_multi(data["sv"], truth_sets, work_dir, data) summary_plots = _plot_evaluation(df_csv) data["sv-validate"] = { "csv": val_summary, "plot": summary_plots, "df": df_csv } else: assert isinstance( truth_sets, basestring) and utils.file_exists(truth_sets), truth_sets val_summary = _evaluate_vcf(data["sv"], truth_sets, work_dir, data) title = "%s structural variants" % dd.get_sample_name(data) summary_plots = validateplot.classifyplot_from_valfile( val_summary, outtype="png", title=title) data["sv-validate"] = { "csv": val_summary, "plot": summary_plots[0] } return data
def evaluate(data): """Provide evaluations for multiple callers split by structural variant type. """ work_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "structural", dd.get_sample_name(data), "validate")) truth_sets = tz.get_in(["config", "algorithm", "svvalidate"], data) if truth_sets and data.get("sv"): if isinstance(truth_sets, dict): val_summary, df_csv = _evaluate_multi(data["sv"], truth_sets, work_dir, data) summary_plots = _plot_evaluation(df_csv) data["sv-validate"] = {"csv": val_summary, "plot": summary_plots, "df": df_csv} else: assert isinstance(truth_sets, basestring) and utils.file_exists(truth_sets), truth_sets val_summary = _evaluate_vcf(data["sv"], truth_sets, work_dir, data) title = "%s structural variants" % dd.get_sample_name(data) summary_plots = validateplot.classifyplot_from_valfile(val_summary, outtype="png", title=title) data["sv-validate"] = {"csv": val_summary, "plot": summary_plots[0]} return data
import sys from bcbio.variation import validateplot title = "smCounter2 UMI: VarDict 1.5.6; octopus 0.5.1b" validateplot.classifyplot_from_valfile(sys.argv[1], outtype="png", title=title)
import sys from bcbio.variation import validateplot title="ICR142: GATK HaplotypeCaller, FreeBayes, VarDict, Platypus" validateplot.classifyplot_from_valfile(sys.argv[1], outtype="png", title=title)