Beispiel #1
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest sample-list',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_sample_list', type=str)
    parser.add_argument(
        '--valid-sample-list',
        type=str,
        default=None,
        help='Sample ids not found in this list will cause an error')
    parser.add_argument('--prefix',
                        type=str,
                        default=None,
                        help='Prefix to add to metric names')

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    samples = iou.read_samples_list(args.test_sample_list, fail_on_empty=False)
    if args.valid_sample_list is not None:
        valid_samples = iou.read_samples_list(args.valid_sample_list)
    else:
        valid_samples = None

    # Get metrics
    metrics = get_metrics(samples, valid_samples, args.prefix)

    # Write metrics
    write_metrics(metrics)
Beispiel #2
0
def get_metrics(file, sample_list):
    samples = iou.read_samples_list(sample_list)
    samples_set = set(samples)

    data = [0, 0, 0, 0]  # ++, --, +-, -+
    for line in file:
        tokens = line.decode().strip().split('\t')
        test_record(tokens, samples_set)
        first = tokens[2]
        second = tokens[5]
        val = first + second
        if val == '++':
            data[0] += 1
        elif val == '--':
            data[1] += 1
        elif val == '+-':
            data[2] += 1
        elif val == '-+':
            data[3] += 1
        else:
            raise ValueError("Unrecognized orientation: %s / %s" %
                             (first, second))

    if len(samples) == 1:
        metric_suffix = "_" + samples[0]
    else:
        metric_suffix = "_merged"

    return {
        PLUS_PLUS_KEY + metric_suffix: data[0],
        MINUS_MINUS_KEY + metric_suffix: data[1],
        PLUS_MINUS_KEY + metric_suffix: data[2],
        MINUS_PLUS_KEY + metric_suffix: data[3]
    }
Beispiel #3
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest ped-file',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_ped_file', type=str)
    parser.add_argument(
        '--sample-list',
        type=str,
        default=None,
        help='Sample ids not found in this list will cause an error')
    parser.add_argument('--prefix',
                        type=str,
                        default=None,
                        help='Prefix to add to metric names')

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    if args.sample_list is not None:
        samples = iou.read_samples_list(args.sample_list)
    else:
        samples = None

    # Get metrics
    df = pd.read_csv(args.test_ped_file, sep='\t', names=range(6))
    metrics = get_metrics(df, valid_samples=samples, metric_prefix=args.prefix)

    # Write metrics
    write_metrics(metrics)
Beispiel #4
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest medcov',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_file', type=str)
    parser.add_argument('sample_list', type=str)
    parser.add_argument('--baseline-file', type=str, default=None)

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    samples = iou.read_samples_list(args.sample_list)

    # Read file
    with open(args.test_file, mode='r') as ftest:
        if args.baseline_file is None:
            metrics = get_metrics(ftest, None, samples)
        else:
            with open(args.baseline_file, mode='r') as fbase:
                metrics = get_metrics(ftest, fbase, samples)

    # Write metrics
    write_metrics(metrics)
Beispiel #5
0
def get_metrics(matrix_file, sample_list, low_mem_mode):
    samples = iou.read_samples_list(sample_list)
    samples_set = set(samples)

    header = matrix_file.readline().decode().strip().split('\t')
    header_samples_set = set(header[3:])
    tu.test_sets_equal(header_samples_set,
                       samples_set,
                       item_str="sample",
                       name_a="header",
                       name_b="samples list")

    data = []
    interval_size = None
    num_records = 0
    for line in matrix_file:
        num_records += 1
        tokens = line.decode().strip().split('\t')
        tu.test_is_int(tokens, 1)
        tu.test_is_int(tokens, 2)
        if interval_size is None:
            interval_size = int(tokens[2]) - int(tokens[1])
        else:
            if interval_size != int(tokens[2]) - int(tokens[1]):
                raise ValueError(
                    "Interval not of size {:d}: {:s}:{:d}-{:d}".format(
                        interval_size, tokens[0], int(tokens[1]),
                        int(tokens[2])))
        counts = tokens[3:]
        test_record(counts, len(samples_set))
        if not low_mem_mode:
            data.append([int(x) for x in counts])

    if not low_mem_mode:
        arr = np.asarray(data)
        quantiles = np.quantile(arr, [0.25, 0.50, 0.75])
        max_over_samples = arr.max(axis=1)
        num_zero_in_all = len(max_over_samples[max_over_samples == 0])
        min_over_samples = arr.min(axis=1)
        num_zero_in_one = len(min_over_samples[min_over_samples == 0])
        metrics = {
            Q25_KEY: quantiles[0],
            Q50_KEY: quantiles[1],
            Q75_KEY: quantiles[2],
            INTERVALS_KEY: num_records,
            ALL_ZERO_KEY: num_zero_in_all,
            ONE_ZERO_KEY: num_zero_in_one
        }
        column_means = arr.mean(axis=0)
        col = 0
        for sample in header[3:]:
            metrics[SAMPLE_MEAN_KEY + "_" + sample] = column_means[col]
            col += 1
    else:
        metrics = {INTERVALS_KEY: num_records}

    return metrics
Beispiel #6
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest merged-depth',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_bed', type=str)
    parser.add_argument('contig_list', type=str)
    parser.add_argument('type', type=str)
    parser.add_argument('--baseline-bed',
                        type=str,
                        default=None,
                        help="Baseline bed file to evaluate against")
    parser.add_argument(
        '--test-hits',
        type=str,
        help=
        "List of test record ids that overlap baseline set (required if using --baseline-bed)"
    )
    parser.add_argument(
        '--baseline-hits',
        type=str,
        help=
        "List of baseline record ids that overlap test set (required if using --baseline-bed)"
    )

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    if (bool(args.baseline_bed) ^ bool(args.test_hits)) or \
       (bool(args.baseline_bed) ^ bool(args.baseline_hits)) or \
       (bool(args.test_hits) ^ bool(args.baseline_hits)):
        raise ValueError(
            "Inconsistent arguments specified: --baseline-bed, --test-hits, and --baseline-hits must be specified together."
        )

    contigs = iou.read_contig_list(args.contig_list)

    # Read file
    with gzip.open(args.test_bed, mode='rb') as ftest:
        if args.baseline_bed is None:
            metrics = get_metrics(ftest, None, contigs, args.type,
                                  args.test_hits, args.baseline_hits)
        else:
            with gzip.open(args.baseline_bed, mode='rb') as fbase:
                metrics = get_metrics(ftest, fbase, contigs, args.type,
                                      args.test_hits, args.baseline_hits)

    # Write metrics
    write_metrics(metrics)
Beispiel #7
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest plot-metrics',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('metrics_a', type=str)
    parser.add_argument('metrics_b', type=str)
    parser.add_argument('pdf_out', type=str)
    parser.add_argument('--sample-list', type=str, default=None)
    parser.add_argument('--changes-only', action='store_true',
                        help='Only plot values that are different')
    parser.add_argument('--metrics-out', type=str,
                        help='Write plotted metrics to tsv', default=None)

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    # Read metric tables and join
    df_a = get_metrics(args.metrics_a)
    df_b = get_metrics(args.metrics_b)
    df = df_a.join(df_b, how='outer', lsuffix='_a', rsuffix='_b', sort=True)

    # If sample ids are provided, consolidate sample-specific metrics
    if args.sample_list is not None:
        samples = iou.read_samples_list(args.sample_list)
        df = consolidate_sample_metrics(df, samples)

    # Only plot changed metrics
    if args.changes_only:
        df = df[df["value_a"] != df["value_b"]]

    # Write raw data to file
    if args.metrics_out is not None:
        df.to_csv(args.metrics_out, sep='\t')

    # Plot
    plot_data(df, args.pdf_out)
Beispiel #8
0
def get_metrics(baf_file, sample_list):
    samples = iou.read_samples_list(sample_list)
    samples_set = set(samples)

    data = []
    for line in baf_file:
        tokens = line.decode().strip().split('\t')
        test_record(tokens, samples_set)
        baf = float(tokens[2])
        data.append(baf)
    arr = np.asarray(data)
    quantiles = np.quantile(arr, [0.25, 0.50, 0.75])

    if len(samples) == 1:
        metric_suffix = "_" + samples[0]
    else:
        metric_suffix = "_merged"

    return {
        Q25_KEY + metric_suffix: quantiles[0],
        Q50_KEY + metric_suffix: quantiles[1],
        Q75_KEY + metric_suffix: quantiles[2],
        COUNT_KEY + metric_suffix: len(arr)
    }
Beispiel #9
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest metrics-file',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('metrics_file', type=str)
    parser.add_argument('contig_list', type=str)
    parser.add_argument('--common', action='store_true')

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)

    contigs = iou.read_contig_list(args.contig_list)
    tu.test_is_not_empty(contigs, "contigs")

    # Read file
    df = pd.read_csv(args.metrics_file, sep='\t')
    metrics = get_metrics(df, contigs, args.common)

    # Write metrics
    write_metrics(metrics)
Beispiel #10
0
def get_metrics(sr_file, sample_list):
    samples = iou.read_samples_list(sample_list)
    samples_set = set(samples)
    side_metrics = [0, 0]
    for line in sr_file:
        tokens = line.decode().strip().split('\t')
        test_record(tokens, samples_set)
        side = tokens[2]
        if side == 'left':
            side_metrics[0] += 1
        elif side == 'right':
            side_metrics[1] += 1
        else:
            raise ValueError("Unrecognized orientation: %s" % side)

    if len(samples) == 1:
        metric_suffix = "_" + samples[0]
    else:
        metric_suffix = "_merged"

    return {
        LEFT_KEY + metric_suffix: side_metrics[0],
        RIGHT_KEY + metric_suffix: side_metrics[1]
    }
Beispiel #11
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest vcf',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_vcf', type=str)
    parser.add_argument('contig_list', type=str)
    parser.add_argument('sample_list', type=str)
    parser.add_argument(
        'types',
        type=str,
        help='Comma-delimited list of variant types (case-sensitive)')
    parser.add_argument('metric_prefix', type=str)
    parser.add_argument(
        '--baseline-vcf',
        type=str,
        help='Baseline vcf to provide evaluation metrics against')
    parser.add_argument(
        '--baseline-bed',
        type=str,
        help=
        'Baseline bed file to provide evaluation metrics against. Must have header beginning with "'
        + BED_FILE_HEADER_CHAR + '" and the following columns: "' +
        '", "'.join([
            BED_FILE_CHROM_COL, BED_FILE_START_COL, BED_FILE_END_COL,
            BED_FILE_SVTYPE_COL
        ]) + '"')
    parser.add_argument(
        '--min-reciprocal-overlap',
        type=float,
        default=0.5,
        help='Minimum reciprocal overlap for validation metrics [0.5]')
    parser.add_argument('--padding',
                        type=int,
                        default=50,
                        help='Interval padding for validation metrics [50]')
    parser.add_argument(
        '--max-warnings',
        type=int,
        default=50,
        help='Maximum number of records to print warnings for [50]')
    parser.add_argument('--fp-file',
                        type=str,
                        default=None,
                        help='Write false positives to file')
    parser.add_argument('--fn-file',
                        type=str,
                        default=None,
                        help='Write false negatives to file')
    parser.add_argument('--fp-pass-file',
                        type=str,
                        default=None,
                        help='Write PASS false positives to file')
    parser.add_argument('--fn-pass-file',
                        type=str,
                        default=None,
                        help='Write PASS false negatives to file')

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)
    if (args.baseline_vcf is None
            and args.baseline_bed is None) and (args.fp_file is not None
                                                or args.fn_file is not None):
        raise ValueError(
            "FP and FN files cannot be generated if --baseline-vcf and --baseline-bed aren't specified"
        )
    if args.baseline_vcf is not None and args.baseline_bed is not None:
        raise ValueError(
            "Cannot specify both --baseline-vcf and --baseline-bed")
    types_list = args.types.split(',')

    contigs = iou.read_contig_list(args.contig_list)
    samples = iou.read_samples_list(args.sample_list)
    metrics, fp_intervals, fn_intervals, fp_intervals_pass, fn_intervals_pass = get_metrics(
        args.test_vcf, args.baseline_vcf, args.baseline_bed, contigs,
        types_list, args.min_reciprocal_overlap, args.padding, samples,
        args.metric_prefix, args.max_warnings)

    # Write metrics
    write_metrics(metrics)
    if args.fp_file is not None and fp_intervals is not None:
        write_intervals(args.fp_file, fp_intervals)
    if args.fn_file is not None and fn_intervals is not None:
        write_intervals(args.fn_file, fn_intervals)
    if args.fp_pass_file is not None and fp_intervals_pass is not None:
        write_intervals(args.fp_pass_file, fp_intervals_pass)
    if args.fn_pass_file is not None and fn_intervals_pass is not None:
        write_intervals(args.fn_pass_file, fn_intervals_pass)
Beispiel #12
0
def main(argv):
    parser = argparse.ArgumentParser(
        description=__doc__,
        prog='svtest vcf',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('test_vcf', type=str)
    parser.add_argument('contig_list', type=str)
    parser.add_argument('sample_list', type=str)
    parser.add_argument(
        'types',
        type=str,
        help='Comma-delimited list of variant types (case-sensitive)')
    parser.add_argument('metric_prefix', type=str)
    parser.add_argument(
        '--baseline-vcf',
        type=str,
        help='Baseline vcf to provide evaluation metrics against')
    parser.add_argument(
        '--min-reciprocal-overlap',
        type=float,
        default=0.5,
        help='Minimum reciprocal overlap for validation metrics [0.5]')
    parser.add_argument('--padding',
                        type=int,
                        default=50,
                        help='Interval padding for validation metrics [50]')
    parser.add_argument(
        '--max-warnings',
        type=int,
        default=50,
        help='Maximum number of records to print warnings for [50]')
    parser.add_argument('--fp-file',
                        type=str,
                        default=None,
                        help='Write false positives to file')
    parser.add_argument('--fn-file',
                        type=str,
                        default=None,
                        help='Write false negatives to file')

    # Print help if no arguments specified
    if len(argv) == 0:
        parser.print_help()
        sys.exit(1)
    args = parser.parse_args(argv)
    if args.baseline_vcf is None and (args.fp_file is not None
                                      or args.fn_file is not None):
        raise ValueError(
            "FP and FN files cannot be generated if --baseline-vcf isn't specified"
        )
    types_list = args.types.split(',')

    contigs = iou.read_contig_list(args.contig_list)
    samples = iou.read_samples_list(args.sample_list)
    metrics, fp_intervals, fn_intervals = get_metrics(
        args.test_vcf, args.baseline_vcf, contigs, types_list,
        args.min_reciprocal_overlap, args.padding, samples, args.metric_prefix,
        args.max_warnings)

    # Write metrics
    write_metrics(metrics)
    if args.fp_file is not None and fp_intervals is not None:
        write_intervals(args.fp_file, fp_intervals)
    if args.fn_file is not None and fn_intervals is not None:
        write_intervals(args.fn_file, fn_intervals)