def _evaluate_multi(calls, truth_svtypes, work_dir, data): base = os.path.join(work_dir, "%s-sv-validate" % (dd.get_sample_name(data))) out_file = base + ".csv" df_file = base + "-df.csv" if any((not utils.file_uptodate(out_file, x["vrn_file"]) or not utils.file_uptodate(df_file, x["vrn_file"])) for x in calls): with file_transaction(data, out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: with open(df_file, "w") as df_out_handle: writer = csv.writer(out_handle) dfwriter = csv.writer(df_out_handle) writer.writerow(["svtype", "size", "caller", "sensitivity", "precision"]) dfwriter.writerow(["svtype", "size", "caller", "metric", "value", "label"]) for svtype, truth in truth_svtypes.items(): for size in EVENT_SIZES: str_size = "%s-%s" % size for call in calls: call_bed = convert.to_bed(call, dd.get_sample_name(data), work_dir, calls, data) if utils.file_exists(call_bed): evalout = _evaluate_one(call["variantcaller"], svtype, size, call_bed, truth, data) writer.writerow([svtype, str_size, call["variantcaller"], evalout["sensitivity"]["label"], evalout["precision"]["label"]]) for metric in ["sensitivity", "precision"]: dfwriter.writerow([svtype, str_size, call["variantcaller"], metric, evalout[metric]["val"], evalout[metric]["label"]]) return out_file, df_file
def _evaluate_multi(calls, truth_svtypes, work_dir, data): base = os.path.join(work_dir, "%s-sv-validate" % (dd.get_sample_name(data))) out_file = base + ".csv" df_file = base + "-df.csv" if any((not utils.file_uptodate(out_file, x["vrn_file"]) or not utils.file_uptodate(df_file, x["vrn_file"])) for x in calls): with file_transaction(data, out_file) as tx_out_file: with open(tx_out_file, "w") as out_handle: with open(df_file, "w") as df_out_handle: writer = csv.writer(out_handle) dfwriter = csv.writer(df_out_handle) writer.writerow(["svtype", "size", "caller", "sensitivity", "precision"]) dfwriter.writerow(["svtype", "size", "caller", "metric", "value", "label"]) for svtype, truth in truth_svtypes.items(): for size in EVENT_SIZES: str_size = "%s-%s" % size for call in calls: call_bed = convert.to_bed(call, dd.get_sample_name(data), work_dir, calls, data) if utils.file_exists(call_bed): evalout = _evaluate_one(call["variantcaller"], svtype, size, call_bed, truth, data) writer.writerow([svtype, str_size, call["variantcaller"], evalout["sensitivity"]["label"], evalout["precision"]["label"]]) for metric in ["sensitivity", "precision"]: dfwriter.writerow([svtype, str_size, call["variantcaller"], metric, evalout[metric]["val"], evalout[metric]["label"]]) return out_file, df_file