Python GenotypeRegionsNode 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pypeline.nodes.paleomix

클래스/타입: GenotypeRegionsNode

hotexamples.com에서의 예제들: 4

Python GenotypeRegionsNode - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pypeline.nodes.paleomix.GenotypeRegionsNode에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

customize(2)

예제 #1

파일 보기

파일: genotype.py 프로젝트: health1987/paleomix

def build_sampling_nodes(options, genotyping, sample, regions, dependencies):
    fasta_file = regions["Genotypes"][sample]
    pileup_file = swap_ext(fasta_file, ".pileup.bgz")

    padding = genotyping["Padding"]
    slop, node = build_regions_nodes(regions, padding, dependencies)

    bam_file = "%s.%s.bam" % (sample, regions["Prefix"])
    bam_file = os.path.join(options.samples_root, bam_file)
    if regions["Realigned"]:
        bam_file = add_postfix(bam_file, ".realigned")
    bai_node = build_bam_index_node(bam_file)

    genotype = GenotypeRegionsNode.customize(pileup_only=True,
                                             reference=regions["FASTA"],
                                             bedfile=slop,
                                             infile=bam_file,
                                             outfile=pileup_file,
                                             nbatches=options.samtools_max_threads,
                                             dependencies=node + (bai_node,))
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    genotype = genotype.build_node()

    tabix = TabixIndexNode(infile=pileup_file,
                           preset="pileup",
                           dependencies=genotype)

    builder = SampleRegionsNode(infile=pileup_file,
                                bedfile=regions["BED"],
                                outfile=fasta_file,
                                dependencies=tabix)

    faidx = FastaIndexNode(infile=fasta_file,
                           dependencies=builder)

    return (faidx,)

예제 #2

파일 보기

def build_sampling_nodes(options, genotyping, sample, regions, dependencies):
    fasta_file = regions["Genotypes"][sample]
    pileup_file = swap_ext(fasta_file, ".pileup.bgz")

    padding = genotyping["Padding"]
    slop, node = build_regions_nodes(regions, padding, dependencies)

    bam_file = "%s.%s.bam" % (sample, regions["Prefix"])
    bam_file = os.path.join(options.samples_root, bam_file)
    if regions["Realigned"]:
        bam_file = add_postfix(bam_file, ".realigned")
    bai_node = build_bam_index_node(bam_file)

    genotype = GenotypeRegionsNode.customize(
        pileup_only=True,
        reference=regions["FASTA"],
        bedfile=slop,
        infile=bam_file,
        outfile=pileup_file,
        nbatches=options.samtools_max_threads,
        dependencies=node + (bai_node, ))
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    genotype = genotype.build_node()

    tabix = TabixIndexNode(infile=pileup_file,
                           preset="pileup",
                           dependencies=genotype)

    builder = SampleRegionsNode(infile=pileup_file,
                                bedfile=regions["BED"],
                                outfile=fasta_file,
                                dependencies=tabix)

    faidx = FastaIndexNode(infile=fasta_file, dependencies=builder)

    return (faidx, )

예제 #3

파일 보기

파일: genotype.py 프로젝트: health1987/paleomix

def build_genotyping_nodes_cached(options, genotyping, sample, regions,
                                  dependencies):
    """Carries out genotyping, filtering of calls, and indexing of files for a
    given sample and prefix. If the option 'GenotypeEntirePrefix' is enabled,
    the BAM is genotyped once, and each set of RegionsOfInterest simply extract
    the relevant regions during construction of the consensus sequence.

    Parameters:
        options: An options object (c.f. pypeline.tools.phylo_pipeline.config).
        genotyping: Genotyping options defined for a specific set of areas of
                    interest, corresponding to Genotyping:NAME in the makefile.
        sample: The name of the sample to be genotyped.
        egions: A dictionary for a 'RegionsOfInterest' from the makefile.
        dependencies: Depenencies that must be met before genotyping starts.

    Returns a tuple containing the filename of the filtered and tabix-indexed
    VCF file, and the top-level node generating this file. Multiple calls for
    the same BAM and prefix will return the same VCF and nodes if the option
    for 'GenotypeEntirePrefix' is enabled, otherwise each ROI is genotyped
    individiually.

    Output files are generated in ./results/PROJECT/genotyping. If the option
    for 'GenotypeEntirePrefix' is enabled, the following files are generated:
        SAMPLE.PREFIX.vcf.bgz: Unfiltered calls for variant/non-variant sites.
        SAMPLE.PREFIX.vcf.pileup.bgz: Pileup of sites containing SNPs.
        SAMPLE.PREFIX.vcf.pileup.bgz.tbi: Tabix index of the pileup.
        SAMPLE.PREFIX.filtered.vcf.bgz: Variant calls filtered with vcf_filter.
        SAMPLE.PREFIX.filtered.vcf.bgz.tbi: Tabix index for the filtered VCF.

    If 'GenotypeEntirePrefix' is not enabled for a given ROI, the following
    files are generated for that ROI (see descriptions above):
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz.tbi
        SAMPLE.PREFIX.ROI.vcf.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz.tbi

    In addition, the following files are generated for each set of
    RegionsOfInterest (ROI), regardless of the 'GenotypeEntirePrefix' option:
        SAMPLE.PREFIX.ROI.CDS.fasta: FASTA sequence of each feature in the ROI.
        SAMPLE.PREFIX.ROI.CDS.fasta.fai: FASTA index generated using SAMTools.

    """
    output_prefix, bamfile, bedfile, dependencies \
        = build_genotyping_bedfile_nodes(options, genotyping, sample, regions,
                                         dependencies)

    if (bamfile, output_prefix) in _VCF_CACHE:
        return _VCF_CACHE[(bamfile, output_prefix)]

    calls = swap_ext(output_prefix, ".vcf.bgz")
    pileups = swap_ext(output_prefix, ".vcf.pileup.bgz")
    filtered = swap_ext(output_prefix, ".filtered.vcf.bgz")

    # 1. Call samtools mpilup | bcftools view on the bam
    genotype = GenotypeRegionsNode.customize(reference=regions["FASTA"],
                                             bedfile=bedfile,
                                             infile=bamfile,
                                             outfile=calls,
                                             nbatches=options.samtools_max_threads,
                                             dependencies=dependencies)

    genotype.command.add_option("--mpileup-argument",
                                "-f=%s" % (regions["FASTA"],), sep="=")
    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    apply_samtools_options(genotype.command, genotyping["BCFTools"],
                           "--bcftools-argument")
    genotype = genotype.build_node()

    # 2. Collect pileups of sites with SNPs, to allow proper filtering by
    #    frequency of the minor allele, as only the major non-ref allele is
    #    counted in the VCF (c.f. field DP4).
    vcfpileup = VCFPileupNode.customize(reference=regions["FASTA"],
                                        infile_bam=bamfile,
                                        infile_vcf=calls,
                                        outfile=pileups,
                                        dependencies=genotype)
    apply_samtools_options(vcfpileup.command, genotyping["MPileup"],
                           "--mpileup-argument")
    vcfpileup = vcfpileup.build_node()

    vcf_tabix = TabixIndexNode(infile=pileups,
                               preset="pileup",
                               dependencies=vcfpileup)

    # 3. Filter all sites using the 'vcf_filter' command
    vcffilter = VCFFilterNode.customize(infile=calls,
                                        pileup=pileups,
                                        outfile=filtered,
                                        regions=regions,
                                        dependencies=vcf_tabix)
    vcffilter = _apply_vcf_filter_options(vcffilter, genotyping, sample)

    # 4. Tabix index. This allows random-access to the VCF file when building
    #    the consensus FASTA sequence later in the pipeline.
    tabix = TabixIndexNode(infile=filtered,
                           preset="vcf",
                           dependencies=vcffilter)

    _VCF_CACHE[(bamfile, output_prefix)] = (filtered, tabix)
    return filtered, tabix

예제 #4

파일 보기

def build_genotyping_nodes_cached(options, genotyping, sample, regions,
                                  dependencies):
    """Carries out genotyping, filtering of calls, and indexing of files for a
    given sample and prefix. If the option 'GenotypeEntirePrefix' is enabled,
    the BAM is genotyped once, and each set of RegionsOfInterest simply extract
    the relevant regions during construction of the consensus sequence.

    Parameters:
        options: An options object (c.f. pypeline.tools.phylo_pipeline.config).
        genotyping: Genotyping options defined for a specific set of areas of
                    interest, corresponding to Genotyping:NAME in the makefile.
        sample: The name of the sample to be genotyped.
        egions: A dictionary for a 'RegionsOfInterest' from the makefile.
        dependencies: Depenencies that must be met before genotyping starts.

    Returns a tuple containing the filename of the filtered and tabix-indexed
    VCF file, and the top-level node generating this file. Multiple calls for
    the same BAM and prefix will return the same VCF and nodes if the option
    for 'GenotypeEntirePrefix' is enabled, otherwise each ROI is genotyped
    individiually.

    Output files are generated in ./results/PROJECT/genotyping. If the option
    for 'GenotypeEntirePrefix' is enabled, the following files are generated:
        SAMPLE.PREFIX.vcf.bgz: Unfiltered calls for variant/non-variant sites.
        SAMPLE.PREFIX.vcf.pileup.bgz: Pileup of sites containing SNPs.
        SAMPLE.PREFIX.vcf.pileup.bgz.tbi: Tabix index of the pileup.
        SAMPLE.PREFIX.filtered.vcf.bgz: Variant calls filtered with vcf_filter.
        SAMPLE.PREFIX.filtered.vcf.bgz.tbi: Tabix index for the filtered VCF.

    If 'GenotypeEntirePrefix' is not enabled for a given ROI, the following
    files are generated for that ROI (see descriptions above):
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz
        SAMPLE.PREFIX.ROI.filtered.vcf.bgz.tbi
        SAMPLE.PREFIX.ROI.vcf.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz
        SAMPLE.PREFIX.ROI.vcf.pileup.bgz.tbi

    In addition, the following files are generated for each set of
    RegionsOfInterest (ROI), regardless of the 'GenotypeEntirePrefix' option:
        SAMPLE.PREFIX.ROI.CDS.fasta: FASTA sequence of each feature in the ROI.
        SAMPLE.PREFIX.ROI.CDS.fasta.fai: FASTA index generated using SAMTools.

    """
    output_prefix, bamfile, bedfile, dependencies \
        = build_genotyping_bedfile_nodes(options, genotyping, sample, regions,
                                         dependencies)

    if (bamfile, output_prefix) in _VCF_CACHE:
        return _VCF_CACHE[(bamfile, output_prefix)]

    calls = swap_ext(output_prefix, ".vcf.bgz")
    pileups = swap_ext(output_prefix, ".vcf.pileup.bgz")
    filtered = swap_ext(output_prefix, ".filtered.vcf.bgz")

    # 1. Call samtools mpilup | bcftools view on the bam
    genotype = GenotypeRegionsNode.customize(
        reference=regions["FASTA"],
        bedfile=bedfile,
        infile=bamfile,
        outfile=calls,
        nbatches=options.samtools_max_threads,
        dependencies=dependencies)

    apply_samtools_options(genotype.command, genotyping["MPileup"],
                           "--mpileup-argument")
    apply_samtools_options(genotype.command, genotyping["BCFTools"],
                           "--bcftools-argument")
    genotype = genotype.build_node()

    # 2. Collect pileups of sites with SNPs, to allow proper filtering by
    #    frequency of the minor allele, as only the major non-ref allele is
    #    counted in the VCF (c.f. field DP4).
    vcfpileup = VCFPileupNode.customize(reference=regions["FASTA"],
                                        infile_bam=bamfile,
                                        infile_vcf=calls,
                                        outfile=pileups,
                                        dependencies=genotype)
    apply_samtools_options(vcfpileup.command, genotyping["MPileup"],
                           "--mpileup-argument")
    vcfpileup = vcfpileup.build_node()

    vcf_tabix = TabixIndexNode(infile=pileups,
                               preset="pileup",
                               dependencies=vcfpileup)

    # 3. Filter all sites using the 'vcf_filter' command
    vcffilter = VCFFilterNode.customize(infile=calls,
                                        pileup=pileups,
                                        outfile=filtered,
                                        regions=regions,
                                        dependencies=vcf_tabix)
    vcffilter = _apply_vcf_filter_options(vcffilter, genotyping, sample)

    # 4. Tabix index. This allows random-access to the VCF file when building
    #    the consensus FASTA sequence later in the pipeline.
    tabix = TabixIndexNode(infile=filtered,
                           preset="vcf",
                           dependencies=vcffilter)

    _VCF_CACHE[(bamfile, output_prefix)] = (filtered, tabix)
    return filtered, tabix