コード例 #1
0
def MakeWriter(outfile, invcf, command):
    r"""Create a VCF writer with a dumpSTR header

    Adds a header line with the dumpSTR command used

    Parameters
    ----------
    outfile : str
       Name of the output file
    invcf : vcf.Reader object
       Input VCF. Used to grab header info
    command : str
       String command used to run dumpSTR

    Returns
    -------
    writer : vcf.Writer object
       VCF writer initialized with header of input VCF
       Set to None if we had a problem writing the file
    """
    invcf.metadata["command-DumpSTR"] = [command]
    try:
        writer = vcf.Writer(open(outfile, "w"), invcf)
    except OSError as e:
        common.WARNING(str(e))
        writer = None
    return writer
コード例 #2
0
ファイル: qcSTR.py プロジェクト: richyanicky/TRTools
def main(args):
    if not os.path.exists(args.vcf):
        common.WARNING("%s does not exist"%args.vcf)
        return 1
    # Set up reader and harmonizer
    invcf = vcf.Reader(filename=args.vcf)
    if args.vcftype != 'auto':
        vcftype = trh.VCFTYPES[args.vcftype]
    else:
        vcftype = trh.InferVCFType(invcf)

    # Load samples
    if args.samples:
        samplelist = [item.strip() for item in open(args.samples, "r").readlines()]
    else: samplelist = invcf.samples
    
    # Set up data to keep track of
    sample_calls = dict([(sample, 0) for sample in samplelist]) # sample->numcalls
    contigs = invcf.contigs
    if len(contigs) == 0:
        common.MSG("Warning: no contigs found in VCF file.")
    chrom_calls = dict([(chrom, 0) for chrom in contigs]) # chrom->numcalls
    diffs_from_ref = [] # for each allele call, keep track of diff (bp) from ref
    diffs_from_ref_unit = [] # for each allele call, keep track of diff (units) from ref
    reflens = [] # for each allele call, keep track of reference length (bp)

    numrecords = 0
    for record in invcf:
        if args.numrecords is not None and numrecords >= args.numrecords: break
        chrom = record.CHROM
        trrecord = trh.HarmonizeRecord(vcftype, record)
        if args.period is not None and len(trrecord.motif) != args.period: continue
        # Extract stats
        rl = len(trrecord.ref_allele)
        allele_counts = trrecord.GetAlleleCounts(uselength=False, samplelist=samplelist)
        called_samples = [item.sample for item in record if item.called]
        # Update data
        num_calls = 0
        for s in called_samples:
            try:
                sample_calls[s] += 1
                num_calls += 1
            except KeyError: pass
        chrom_calls[chrom] = chrom_calls.get(chrom, 0) + num_calls
        for allele in allele_counts.keys():
            allelediff = len(allele)-rl
            count = allele_counts[allele]
            reflens.extend([rl]*count)
            diffs_from_ref.extend([allelediff]*count)
            diffs_from_ref_unit.extend([allelediff/len(trrecord.motif)]*count)
        numrecords += 1

    OutputDiffRefHistogram(diffs_from_ref_unit, args.out + "-diffref-histogram.pdf")
    OutputDiffRefBias(diffs_from_ref, reflens, args.out + "-diffref-bias.pdf")
    OutputSampleCallrate(sample_calls, args.out+"-sample-callnum.pdf")
    OutputChromCallrate(chrom_calls, args.out+"-chrom-callnum.pdf")
    return 0
コード例 #3
0
def CheckHipSTRFilters(invcf, args):
    r"""Check HipSTR call-level filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.hipstr_max_call_flank_indel is not None:
        if args.hipstr_max_call_flank_indel < 0 or args.hipstr_max_call_flank_indel > 1:
            common.WARNING(
                "--hipstr-max-call-flank-indel must be between 0 and 1")
            return False
        assert "DP" in invcf.formats and "DFLANKINDEL" in invcf.formats  # should always be true
    if args.hipstr_max_call_stutter is not None:
        if args.hipstr_max_call_stutter < 0 or args.hipstr_max_call_stutter > 1:
            common.WARNING("--hipstr-max-call-stutter must be between 0 and 1")
            return False
        assert "DP" in invcf.formats and "DSTUTTER" in invcf.formats  # should always be true
    if args.hipstr_min_supp_reads is not None:
        if args.hipstr_min_supp_reads < 0:
            common.WARNING("--hipstr-min-supp-reads must be >= 0")
            return False
        assert "ALLREADS" in invcf.formats and "GB" in invcf.formats
    if args.hipstr_min_call_DP is not None:
        if args.hipstr_min_call_DP < 0:
            common.WARNING("--hipstr-min-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.hipstr_max_call_DP is not None:
        if args.hipstr_max_call_DP < 0:
            common.WARNING("--hipstr-max-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.hipstr_min_call_DP is not None and args.hipstr_max_call_DP is not None:
        if args.hipstr_max_call_DP < args.hipstr_min_call_DP:
            common.WARNING(
                "--hipstr-max-call-DP must be >= --hipstr-min-call-DP")
            return False
    if args.hipstr_min_call_Q is not None:
        if args.hipstr_min_call_Q < 0 or args.hipstr_min_call_Q > 1:
            common.WARNING("--hipstr-min-call-Q must be between 0 and 1")
            return False
        assert "Q" in invcf.formats
    return True
コード例 #4
0
def CheckLocusFilters(args, vcftype):
    r"""Perform checks on user inputs for locus-level filters

    Parameters
    ----------
    args : argparse namespace
        Contains user arguments
    vcftype : enum.
        Specifies which tool this VCF came from.
        Must be included in trh.VCFTYPES

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.min_locus_hwep is not None:
        if args.min_locus_hwep < 0 or args.min_locus_hwep > 1:
            common.WARNING("Invalid --min-locus-hwep. Must be between 0 and 1")
            return False
    if args.min_locus_het is not None:
        if args.min_locus_het < 0 or args.min_locus_het > 1:
            common.WARNING("Invalid --min-locus-het. Must be between 0 and 1")
            return False
    if args.max_locus_het is not None:
        if args.max_locus_het < 0 or args.max_locus_het > 1:
            common.WARNING("Invalid --max-locus-het. Must be between 0 and 1")
            return False
    if args.min_locus_het is not None and args.max_locus_het is not None:
        if args.max_locus_het < args.min_locus_het:
            common.WARNING(
                "Cannot have --max-locus-het less than --min-locus-het")
            return False
    if args.use_length and vcftype not in [trh.VCFTYPES["hipstr"]]:
        common.WARNING(
            "--use-length is only meaningful for HipSTR, which reports sequence level differences."
        )
    if args.filter_hrun and vcftype not in [trh.VCFTYPES["hipstr"]]:
        common.WARNING(
            "--filter-run only relevant to HipSTR files. This filter will have no effect."
        )
    if args.filter_regions is not None:
        if args.filter_regions_names is not None:
            filter_region_files = args.filter_regions.split(",")
            filter_region_names = args.filter_regions_names.split(",")
            if len(filter_region_names) != len(filter_region_files):
                common.WARNING(
                    "Length of --filter-regions-names must match --filter-regions."
                )
                return False
    return True
コード例 #5
0
def CheckPopSTRFilters(invcf, args):
    r"""Check PopSTR call-level filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.popstr_min_call_DP is not None:
        if args.popstr_min_call_DP < 0:
            common.WARNING("--popstr-min-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.popstr_max_call_DP is not None:
        if args.popstr_max_call_DP < 0:
            common.WARNING("--popstr-max-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.popstr_min_call_DP is not None and args.popstr_max_call_DP is not None:
        if args.popstr_max_call_DP < args.popstr_min_call_DP:
            common.WARNING(
                "--popstr-max-call-DP must be >= --popstr-min-call-DP")
            return False
    if args.popstr_require_support is not None:
        if args.popstr_require_support < 0:
            common.WARNING("--popstr-require-support must be >= 0")
            return False
        assert "AD" in invcf.formats
    return True
コード例 #6
0
def CheckAdVNTRFilters(invcf, args):
    r"""Check adVNTR call-level filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.advntr_min_call_DP is not None:
        if args.advntr_min_call_DP < 0:
            common.WARNING("--advntr-min-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.advntr_max_call_DP is not None:
        if args.advntr_max_call_DP < 0:
            common.WARNING("--advntr-max-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.advntr_min_call_DP is not None and args.advntr_max_call_DP is not None:
        if args.advntr_max_call_DP < args.advntr_min_call_DP:
            common.WARNING(
                "--advntr-max-call-DP must be >= --advntr-min-call-DP")
            return False
    if args.advntr_min_spanning is not None:
        if args.advntr_min_spanning < 0:
            common.WARNING("--advntr-min-spanning must be >=0")
            return False
        assert "SR" in invcf.formats
    if args.advntr_min_flanking is not None:
        if args.advntr_min_flanking < 0:
            common.WARNING("--advntr-min-flanking must be >=0")
            return False
        assert "FR" in invcf.formats
    if args.advntr_min_ML is not None:
        if args.advntr_min_ML < 0:
            common.WARNING("--advntr-min-ML must be >= 0")
            return False
        assert "ML" in invcf.formats
    return True
コード例 #7
0
def CheckEHFilters(invcf, args):  # pragma: no cover
    r"""Check ExpansionHunter call-level filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.eh_min_ADFL is not None:
        if args.eh_min_ADFL < 0:
            common.WARNING("--eh-min-ADFL must be >= 0")
            return False
        assert "ADFL" in invcf.formats
    if args.eh_min_ADIR is not None:
        if args.eh_min_ADIR < 0:
            common.WARNING("--eh-min-ADIR must be >= 0")
            return False
        assert "ADIR" in invcf.formats
    if args.eh_min_ADSP is not None:
        if args.eh_min_ADSP < 0:
            common.WARNING("--eh-min-ADSP must be >= 0")
            return False
        assert "ADSP" in invcf.formats
    if args.eh_min_call_LC is not None:
        if args.eh_min_call_LC < 0:
            common.WARNING("--eh-min-call-LC must be >= 0")
            return False
        assert "LC" in invcf.formats
    if args.eh_max_call_LC is not None:
        if args.eh_max_call_LC < 0:
            common.WARNING("--eh-max-call-LC must be >= 0")
            return False
        assert "LC" in invcf.formats
    if args.eh_min_call_LC is not None and args.eh_max_call_LC is not None:
        if args.eh_max_call_LC < args.eh_min_call_LC:
            common.WARNING("--eh-max-call-LC must be >= --eh-min-call-LC")
            return False
    return True
コード例 #8
0
def CheckFilters(invcf, args, vcftype):
    r"""Perform checks on user input for filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments
    vcftype : enum.
        Specifies which tool this VCF came from.
        Must be included in trh.VCFTYPES

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if not CheckLocusFilters(args, vcftype):
        return False

    # Check HipSTR specific filters
    if args.hipstr_max_call_flank_indel is not None or \
       args.hipstr_max_call_stutter is not None or \
       args.hipstr_min_supp_reads is not None or \
       args.hipstr_min_call_DP is not None or \
       args.hipstr_max_call_DP is not None or \
       args.hipstr_min_call_Q is not None:
        if vcftype != trh.VCFTYPES["hipstr"]:
            common.WARNING("HipSTR options can only be applied to HipSTR VCFs")
            return False
        else:
            if not CheckHipSTRFilters(invcf, args):
                return False

    # Check GangSTR specific filters
    if args.gangstr_min_call_DP is not None or \
       args.gangstr_max_call_DP is not None or \
       args.gangstr_min_call_Q is not None or \
       args.gangstr_expansion_prob_het is not None or \
       args.gangstr_expansion_prob_hom is not None or \
       args.gangstr_expansion_prob_total is not None or \
       args.gangstr_filter_span_only or \
       args.gangstr_filter_spanbound_only or \
       args.gangstr_filter_badCI or \
       args.gangstr_require_support is not None or \
       args.gangstr_readlen is not None:
        if vcftype != trh.VCFTYPES["gangstr"]:
            common.WARNING(
                "GangSTR options can only be applied to GangSTR VCFs")
            return False
        else:
            if not CheckGangSTRFilters(invcf, args):
                return False

    # Check adVNTR specific filters
    if args.advntr_min_call_DP is not None or \
       args.advntr_max_call_DP is not None or \
       args.advntr_min_spanning is not None or \
       args.advntr_min_flanking is not None or \
       args.advntr_min_ML is not None:
        if vcftype != trh.VCFTYPES["advntr"]:
            common.WARNING("adVNTR options can only be applied to adVNTR VCFs")
            return False
        else:
            if not CheckAdVNTRFilters(invcf, args):
                return False

    # Check EH specific filters
    if args.eh_min_ADFL is not None or \
       args.eh_min_ADIR is not None or \
       args.eh_min_ADSP is not None or \
       args.eh_min_call_LC is not None or \
       args.eh_max_call_LC is not None:
        if vcftype != trh.VCFTYPES["eh"]:
            common.WARNING(
                "ExpansionHunter options can only be applied to ExpansionHunter VCFs"
            )
            return False
        else:  # pragma: no cover
            if not CheckEHFilters(invcf, args):  # pragma: no cover
                return False  # pragma: no cover

    # Check popSTR specific filters
    if args.popstr_min_call_DP is not None or \
       args.popstr_max_call_DP is not None or \
       args.popstr_require_support is not None:
        if vcftype != trh.VCFTYPES["popstr"]:
            common.WARNING("popSTR options can only be applied to popSTR VCFs")
            return False
        else:
            if not CheckPopSTRFilters(invcf, args):
                return False
    return True
コード例 #9
0
def CheckGangSTRFilters(invcf, args):
    r"""Check GangSTR call-level filters

    Parameters
    ----------
    invcf : str
        vcf.Reader object
    args : argparse namespace
        Contains user arguments

    Returns
    -------
    checks : bool
        Set to True if all filters look ok.
        Set to False if filters are invalid
    """
    if args.gangstr_min_call_DP is not None:
        if args.gangstr_min_call_DP < 0:
            common.WARNING("--gangstr-min-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.gangstr_max_call_DP is not None:
        if args.gangstr_max_call_DP < 0:
            common.WARNING("--gangstr-max-call-DP must be >= 0")
            return False
        assert "DP" in invcf.formats
    if args.gangstr_min_call_DP is not None and args.gangstr_max_call_DP is not None:
        if args.gangstr_max_call_DP < args.gangstr_min_call_DP:
            common.WARNING(
                "--gangstr-max-call-DP must be >= --gangstr-min-call-DP")
            return False
    if args.gangstr_min_call_Q is not None:
        if args.gangstr_min_call_Q < 0 or args.gangstr_min_call_Q > 1:
            common.WARNING("--gangstr-min-call-Q must be between 0 and 1")
            return False
        assert "Q" in invcf.formats
    if args.gangstr_expansion_prob_het is not None:
        if args.gangstr_expansion_prob_het < 0 or args.gangstr_expansion_prob_het > 1:
            common.WARNING(
                "--gangstr-expansion-prob-het must be between 0 and 1")
            return False
        assert "QEXP" in invcf.formats
    if args.gangstr_expansion_prob_hom is not None:
        if args.gangstr_expansion_prob_hom < 0 or args.gangstr_expansion_prob_hom > 1:
            common.WARNING(
                "--gangstr-expansion-prob-hom must be between 0 and 1")
            return False
        assert "QEXP" in invcf.formats
    if args.gangstr_expansion_prob_total is not None:
        if args.gangstr_expansion_prob_total < 0 or args.gangstr_expansion_prob_total > 1:
            common.WARNING(
                "--gangstr-expansion-prob-total must be between 0 and 1")
            return False
        assert "QEXP" in invcf.formats
    if args.gangstr_require_support is not None:
        if args.gangstr_require_support < 0:
            common.WARNING("--gangstr-require-support must be >= 0")
            return False
        if args.gangstr_require_support > 0 and args.gangstr_readlen is None:
            common.WARNING(
                "Using --gangstr-require-support requires setting --gangstr-readlen"
            )
            return False
        if args.gangstr_readlen is not None and args.gangstr_readlen < 20:
            common.WARNING("--gangstr-readlen must be an integer value >= 20")
            return False
        assert "ENCLREADS" in invcf.formats and "FLNKREADS" in invcf.formats and "RC" in invcf.formats
    return True
コード例 #10
0
def main(args):
    # Load VCF file
    if not os.path.exists(args.vcf):
        common.WARNING("%s does not exist" % args.vcf)
        return 1
    invcf = vcf.Reader(filename=args.vcf)

    # Set up record harmonizer and infer VCF type
    vcftype = trh.InferVCFType(invcf)

    # Check filters all make sense
    if not CheckFilters(invcf, args, vcftype): return 1

    # Set up locus-level filter list
    try:
        filter_list = BuildLocusFilters(args, vcftype)
    except ValueError:
        return 1
    invcf.filters = {}
    for f in filter_list:
        short_doc = f.__doc__ or ''
        short_doc = short_doc.split('\n')[0].lstrip()
        invcf.filters[f.filter_name()] = _Filter(f.filter_name(), short_doc)

    # Set up call-level filters
    call_filters = BuildCallFilters(args)

    # Add new FORMAT fields
    if "FILTER" not in invcf.formats:
        invcf.formats["FILTER"] = _Format("FILTER", 1, "String",
                                          "Call-level filter")

    # Add new INFO fields
    invcf.infos["AC"] = _Info("AC",
                              -1,
                              "Integer",
                              "Alternate allele counts",
                              source=None,
                              version=None)
    invcf.infos["REFAC"] = _Info("REFAC",
                                 1,
                                 "Integer",
                                 "Reference allele count",
                                 source=None,
                                 version=None)
    invcf.infos["HET"] = _Info("HET",
                               1,
                               "Float",
                               "Heterozygosity",
                               source=None,
                               version=None)
    invcf.infos["HWEP"] = _Info("HWEP",
                                1,
                                "Float",
                                "HWE p-value for obs. vs. exp het rate",
                                source=None,
                                version=None)
    invcf.infos["HRUN"] = _Info("HRUN",
                                1,
                                "Integer",
                                "Length of longest homopolymer run",
                                source=None,
                                version=None)

    # Set up output files
    if not os.path.exists(os.path.dirname(os.path.abspath(args.out))):
        common.WARNING("Output directory does not exist")
        return 1
    outvcf = MakeWriter(args.out + ".vcf", invcf, " ".join(sys.argv))
    if outvcf is None: return 1

    # Set up sample info
    all_reasons = GetAllCallFilters(call_filters)
    sample_info = {}
    for s in invcf.samples:
        sample_info[s] = {"numcalls": 0, "totaldp": 0}
        for r in all_reasons:
            sample_info[s][r] = 0

    # Set up locus info
    loc_info = {"totalcalls": 0, "PASS": 0}
    for filt in filter_list:
        loc_info[filt.filter_name()] = 0

    # Go through each record
    record_counter = 0
    while True:
        try:
            record = next(invcf)
        except IndexError:
            common.WARNING(
                "Skipping TR that couldn't be parsed by PyVCF. Check VCF format"
            )
            if args.die_on_warning: return 1
        except StopIteration:
            break
        if args.verbose:
            common.MSG("Processing %s:%s" % (record.CHROM, record.POS))
        record_counter += 1
        if args.num_records is not None and record_counter > args.num_records:
            break
        # Call-level filters
        record = ApplyCallFilters(record, invcf, call_filters, sample_info)

        # Locus-level filters
        record.FILTER = None
        output_record = True
        for filt in filter_list:
            if filt(record) == None: continue
            if args.drop_filtered:
                output_record = False
                break
            record.add_filter(filt.filter_name())
            loc_info[filt.filter_name()] += 1
        if args.drop_filtered:
            if record.call_rate == 0: output_record = False
        if output_record:
            trrecord = trh.HarmonizeRecord(vcftype, record)
            # Recalculate locus-level INFO fields
            record.INFO["HRUN"] = utils.GetHomopolymerRun(record.REF)
            if record.num_called > 0:
                allele_freqs = trrecord.GetAlleleFreqs(
                    uselength=args.use_length)
                genotype_counts = trrecord.GetGenotypeCounts(
                    uselength=args.use_length)
                record.INFO["HET"] = utils.GetHeterozygosity(allele_freqs)
                record.INFO["HWEP"] = utils.GetHardyWeinbergBinomialTest(
                    allele_freqs, genotype_counts)
                record.INFO["AC"] = [
                    int(item * (3 * record.num_called)) for item in record.aaf
                ]
                record.INFO["REFAC"] = int(
                    (1 - sum(record.aaf)) * (2 * record.num_called))
            else:
                record.INFO["HET"] = -1
                record.INFO["HWEP"] = -1
                record.INFO["AC"] = [0] * len(record.ALT)
                record.INFO["REFAC"] = 0
            # Recalc filter
            if record.FILTER is None and not args.drop_filtered:
                record.FILTER = "PASS"
                loc_info["PASS"] += 1
                loc_info["totalcalls"] += record.num_called
            # Output the record
            outvcf.write_record(record)

    # Output log info
    WriteSampLog(sample_info, all_reasons, args.out + ".samplog.tab")
    WriteLocLog(loc_info, args.out + ".loclog.tab")

    return 0
コード例 #11
0
ファイル: compareSTR.py プロジェクト: richyanicky/TRTools
def main(args):
    ### Check and load VCF files ###
    vcfreaders = mergeutils.LoadReaders([args.vcf1, args.vcf2],
                                        region=args.region)
    contigs = vcfreaders[0].contigs
    chroms = list(contigs)

    ### Load shared samples ###
    samples = mergeutils.GetSharedSamples(vcfreaders)
    if len(samples) == 0:
        common.WARNING("No shared smaples found between vcf readers")
        return 1
    if args.samples:
        usesamples = set(
            [item.strip() for item in open(args.samples, "r").readlines()])
        samples = list(set(samples).intersection(usesamples))
    if len(samples) == 0:
        common.WARNING("No shared samples found between files")
        return 1

    ### Determine FORMAT fields we should look for ###
    if args.stratify_file is not None and args.stratify_file not in [0, 1, 2]:
        common.MSG("--stratify-file must be 0,1, or 2")
        return 1
    format_fields, format_binsizes = GetFormatFields(args.stratify_fields,
                                                     args.stratify_binsizes,
                                                     args.stratify_file,
                                                     vcfreaders)

    ### Keep track of data to summarize at the end ###
    results_dir = {
        "chrom": [],
        "start": [],
        "period": [],
        "sample": [],
        "gtstring1": [],
        "gtstring2": [],
        "gtsum1": [],
        "gtsum2": [],
        "metric-conc-seq": [],
        "metric-conc-len": [],
    }
    for ff in format_fields:
        results_dir[ff + "1"] = []
        results_dir[ff + "2"] = []

    vcftype1 = trh.VCFTYPES[args.vcftype1]
    vcftype2 = trh.VCFTYPES[args.vcftype2]

    ### Walk through sorted readers, merging records as we go ###
    current_records = [next(reader) for reader in vcfreaders]
    is_min = mergeutils.GetMinRecords(current_records, chroms)

    done = mergeutils.DoneReading(current_records)
    num_records = 0
    while not done:
        if any([item is None for item in current_records]): break
        if args.numrecords is not None and num_records >= args.numrecords:
            break
        if args.verbose:
            mergeutils.PrintCurrentRecords(current_records, is_min)
        if mergeutils.CheckMin(is_min): return 1
        if all([is_min]):
            if (current_records[0].CHROM == current_records[1].CHROM and \
                current_records[0].POS == current_records[1].POS):
                UpdateComparisonResults(trh.HarmonizeRecord(vcftype1, current_records[0]), \
                                        trh.HarmonizeRecord(vcftype2, current_records[1]), \
                                        format_fields, samples, results_dir)
        current_records = mergeutils.GetNextRecords(vcfreaders,
                                                    current_records, is_min)
        is_min = mergeutils.GetMinRecords(current_records, chroms)
        done = mergeutils.DoneReading(current_records)
        num_records += 1

    ### Load all results to a dataframe and output full results ###
    data = pd.DataFrame(results_dir)
    data.to_csv(args.out + "-callcompare.tab", sep="\t", index=False)

    ### Overall metrics ###
    OutputOverallMetrics(data, format_fields, format_binsizes,
                         args.stratify_file, args.period, args.out)
    if not args.noplot:
        OutputBubblePlot(data,
                         args.period,
                         args.out,
                         minval=args.bubble_min,
                         maxval=args.bubble_max)

    ### Per-locus metrics ###
    OutputLocusMetrics(data, args.out, args.noplot)

    ### Per-sample metrics ###
    OutputSampleMetrics(data, args.out, args.noplot)

    return 0
コード例 #12
0
ファイル: statSTR.py プロジェクト: richyanicky/TRTools
def main(args):
    if not os.path.exists(args.vcf):
        common.WARNING("%s does not exist" % args.vcf)
        return 1
    # Load samples
    sample_lists = []
    sample_prefixes = []
    if args.samples:
        sfiles = args.samples.split(",")
        if args.sample_prefixes:
            sample_prefixes = args.sample_prefixes.split(",")
        else:
            sample_prefixes = [str(item) for item in range(1, len(sfiles) + 1)]
        if len(sfiles) != len(sample_prefixes):
            common.MSG("--sample-prefixes must be same length as --samples")
            return 1
        for sf in sfiles:
            sample_lists.append(
                [item.strip() for item in open(sf, "r").readlines()])

    invcf = vcf.Reader(filename=args.vcf)
    if args.vcftype != 'auto':
        vcftype = trh.VCFTYPES[args.vcftype]
    else:
        vcftype = trh.InferVCFType(invcf)

    header = ["chrom", "start", "end"]
    if args.thresh: header.extend(GetHeader("thresh", sample_prefixes))
    if args.afreq: header.extend(GetHeader("afreq", sample_prefixes))
    if args.acount: header.extend(GetHeader("acount", sample_prefixes))
    if args.hwep: header.extend(GetHeader("hwep", sample_prefixes))
    if args.het: header.extend(GetHeader("het", sample_prefixes))
    if args.mean: header.extend(GetHeader("mean", sample_prefixes))
    if args.mode: header.extend(GetHeader("mode", sample_prefixes))
    if args.var: header.extend(GetHeader("var", sample_prefixes))
    if args.numcalled: header.extend(GetHeader("numcalled", sample_prefixes))
    if args.out == "stdout":
        if args.plot_afreq:
            common.MSG("Cannot use --out stdout when generating plots")
            return 1
        outf = sys.stdout
    else:
        outf = open(args.out + ".tab", "w")
    outf.write("\t".join(header) + "\n")

    if args.region:
        if not os.path.isfile(args.vcf + ".tbi"):
            common.MSG("Make sure %s is bgzipped and indexed" % args.vcf)
            return 1
        regions = invcf.fetch(args.region)
    else:
        regions = invcf
    num_plotted = 0
    for record in regions:
        trrecord = trh.HarmonizeRecord(vcftype, record)
        if args.plot_afreq and num_plotted <= MAXPLOTS:
            PlotAlleleFreqs(trrecord,
                            args.out,
                            samplelists=sample_lists,
                            sampleprefixes=sample_prefixes)
            num_plotted += 1
        items = [record.CHROM, record.POS, record.INFO["END"]]
        if args.thresh:
            items.extend(GetThresh(trrecord, samplelists=sample_lists))
        if args.afreq:
            items.extend(
                GetAFreq(trrecord,
                         samplelists=sample_lists,
                         uselength=args.use_length))
        if args.acount:
            items.extend(
                GetAFreq(trrecord,
                         samplelists=sample_lists,
                         uselength=args.use_length,
                         count=True))
        if args.hwep:
            items.extend(
                GetHWEP(trrecord,
                        samplelists=sample_lists,
                        uselength=args.use_length))
        if args.het:
            items.extend(
                GetHet(trrecord,
                       samplelists=sample_lists,
                       uselength=args.use_length))
        if args.mean:
            items.extend(GetMean(trrecord, samplelists=sample_lists))
        if args.mode:
            items.extend(GetMode(trrecord, samplelists=sample_lists))
        if args.var:
            items.extend(GetVariance(trrecord, samplelists=sample_lists))
        if args.numcalled:
            items.extend(GetNumSamples(trrecord, samplelists=sample_lists))
        outf.write("\t".join([str(item) for item in items]) + "\n")
    outf.close()
    return 0
コード例 #13
0
def test_WARNING():
    common.WARNING("Writing a test warning")
    common.WARNING("Writing a test warning")