Python index Examples

Programming Language: Python

Namespace/Package Name: pipelines.samtools

Method/Function: index

Examples at hotexamples.com: 2

Python index - 2 examples found. These are the top rated real world Python examples of pipelines.samtools.index extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: common.py Project: GeoffColburn/pydcamp

def create_data_dir(args, fasta_path, bam_path):
    print "++Creating data directory for bam2aln processing."
    data_dir = os.path.join(args.output_dir, "data")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    reference_fasta_path = os.path.join(data_dir, "reference.fasta")
    if not os.path.exists(reference_fasta_path):
        shutil.copy2(fasta_path, reference_fasta_path)
        samtools.faidx(reference_fasta_path)

    reference_bam_path = os.path.join(data_dir, "reference.bam")
    if not os.path.exists(reference_bam_path):
        shutil.copy2(bam_path, reference_bam_path)
        samtools.index(reference_bam_path)

Example #2

Show file

File: dcamp.py Project: GeoffColburn/pydcamp

def do_gatk(args):
    fasta_path, sorted_bam_path = pipelines.common.ssaha2_alignment(args)


    #Gatk
    #Step 3
    step_3_dir = os.path.join(args.output_dir, "03_gatk")
    step_3_file = os.path.join(step_3_dir, "gatk.done")
    realigned_bam_path = ""
    if not os.path.exists(step_3_file):
        print "++Gatk recalibration and realignment of reference alignment file."
        if not os.path.exists(step_3_dir):
            os.makedirs(step_3_dir)
            
        #Step: Picard: Mark Duplicates.
        dedup_bam_path = os.path.join(step_3_dir, "dedup.bam")
        dedup_metrics_path = os.path.join(step_3_dir, "dedup.metrics")
        picardtools.mark_duplicates(sorted_bam_path, dedup_bam_path, dedup_metrics_path)

        #Step: Samtools: Index BAM. ***Do after mark duplicates.
        index_done_file = os.path.join(step_3_dir, "index.done")
        if not os.path.exists(index_done_file):
            samtools.index(dedup_bam_path)
            open(index_done_file, 'w').close()

        #Step: Gatk Realign.
        #Gatk: Intervals.
        intervals_path = gatk.realigner_target_creator(fasta_path, dedup_bam_path)
        #Gatk: Indel Realign.
        realigned_bam_path = gatk.indel_realigner(fasta_path, dedup_bam_path, intervals_path)

        #Step: Gatk Recal. ***May not be able to do due to need for dbSNP file.
        #CountCovariates.
        #recal_csv_path = os.path.join(step_3_dir, "recal_data.csv")
        #gatk.count_covariates(fasta_path, realigned_bam_path, recal_csv_path)
        ##TableRecalibration.
        #recal_bam_path = os.path.join(step_3_dir, "recal.bam")
        #gatk.table_recalibration(fasta_path, realigned_bam_path, recal_csv_path, recal_bam_path)
        
        p.dump(realigned_bam_path, open(step_3_file, 'w'))
    else:
        realigned_bam_path = p.load(open(step_3_file, 'r'))
        
    #Gatk Output
    #Step 4
    output_dir = os.path.join(args.output_dir, "output")
    raw_vcf_path = os.path.join(output_dir, "output.vcf")
    gd_path = os.path.join(output_dir, "output.gd")
    print "++Filtering poor values for SNPs and INDELs in output and converting vcf files to gd."

    if not os.path.exists(output_dir): os.makedirs(output_dir)
    
    if not os.path.exists(raw_vcf_path):
        gatk.unified_genotyper(fasta_path, realigned_bam_path, raw_vcf_path, args.glm_option)

    breseq.command.vcf2gd(raw_vcf_path, gd_path)
    
    #Gatk recommended filter values for SNPs and INDELs.
    snp_filters = ['"QD < 2.0"',\
                   '"MQ < 40.0"',\
                   '"FS > 60.0"',\
                   '"HaplotypeScore > 13.0"',\
                   '"MQRankSum < -12.5"',\
                   '"ReadPosRankSum < -8.0"']

    indel_filters = ['"QD < 2.0"',\
                     '"ReadPosRankSum < -20.0"',\
                     '"InbreedingCoeff < -0.8"',\
                     '"FS > 200.0"']

    snp_gd = os.path.join(output_dir, "SNP.gd")
    indels_gd = os.path.join(output_dir, "INDELS.gd")

    breseq.command.genome_diff_filter(snp_gd, gd_path, ["SNP"], snp_filters)
    breseq.command.genome_diff_filter(indels_gd, gd_path, ["INS", "DEL"], indel_filters)

    breseq.command.genome_diff_merge([snp_gd, indels_gd] , gd_path)
    breseq.command.genome_diff_filter(gd_path, gd_path, ["ALL"], ['"AF!=1.00"'])


    pipelines.common.create_data_dir(args, fasta_path, realigned_bam_path)