Beispiel #1
0
         command='%s sampe %s %s %s %s %s %s | %s view -bS - > %s'%(BWA_bin, read_group_command, genome_file, sai_file1, sai_file2, fastq_file1, 
                                                                    fastq_file2, samtools_bin, bam_file)
 else:
     command='%s samse %s %s %s %s | %s view -bS - > %s'%(BWA_bin, read_group_command, genome_file, sai_file1, fastq_file1, samtools_bin, 
                                                          bam_file)
 return_code = command_runner.run_command( command)
 
 if return_code is not 0:
     run_fine = False
 
 
 if sort:
     files_and_dir.append(bam_file)
     if picard_dir:
         sorted_bam_file=os.path.join(output_dir,sample_name+'_sorted.bam')
         return_code = utils.sort_bam_file_per_coordinate(picard_dir, bam_file, sorted_bam_file, overwrite=True)
     else:
         sorted_bam_file=os.path.join(output_dir,sample_name+'_sorted')
         command='%s sort %s %s'%(samtools_bin, bam_file, sorted_bam_file)
         return_code = command_runner.run_command( command)
     if return_code is not 0:
         run_fine = False
 
         
 if run_fine and clean_up:
     return_code = remove_file(files_and_dir)
     if return_code is not 0:
         run_fine = False
 
 return run_fine
     
            nb_fragment += len(second_reads)
        output_reads(output_stream, first_reads, second_reads)
    library_size = estimate_library_size(nb_fragment, total_nb_uniqs)
    print "%s fragments" % (nb_fragment)
    print "%s (%.2f%%) duplicates" % (total_nb_dups,
                                      float(total_nb_dups) / nb_fragment * 100)
    print "nb unique=%d" % (total_nb_uniqs)
    print "library size=%d" % round(library_size, 0)
    print "Sort the new bam file"
    output_stream.flush()
    output_stream.close()
    if picard_dir:
        output_bam_file = tmp + '_mrk_dup.bam'
        return_code = utils.sort_bam_file_per_coordinate(
            picard_dir,
            tmp_bam_file,
            output_bam_file,
            overwrite=True,
            validation_stringency="SILENT")
    else:
        output_bam_file = tmp + '_mrk_dup'
        command = '%s sort %s %s' % (samtools_bin, tmp_bam_file,
                                     output_bam_file)
        return_code = command_runner.run_command(command)
    if return_code == 0:
        command_runner.run_command('rm -f %s' % (tmp_bam_file))


def find_duplicates(first_reads, second_reads, distance_threshold):
    uniq_second_sequences = {}
    all_second_reads = second_reads.values()
    nb_duplicate = 0
        if current_reference!='*':
            nb_dups = find_duplicates(first_reads,second_reads, distance_threshold)
            total_nb_dups+=nb_dups
            nb_fragment+=len(second_reads)
        output_reads(output_stream, first_reads, second_reads)
    library_size = estimate_library_size(nb_fragment, total_nb_uniqs)
    print "%s fragments"%(nb_fragment)
    print "%s (%.2f%%) duplicates"%(total_nb_dups,float(total_nb_dups)/nb_fragment*100)
    print "nb unique=%d"%(total_nb_uniqs)
    print "library size=%d"%round(library_size,0)
    print "Sort the new bam file"
    output_stream.flush()
    output_stream.close()
    if picard_dir:
        output_bam_file = tmp + '_mrk_dup.bam'
        return_code = utils.sort_bam_file_per_coordinate(picard_dir, tmp_bam_file, output_bam_file, overwrite=True, validation_stringency="SILENT")
    else:
        output_bam_file = tmp + '_mrk_dup'
        command='%s sort %s %s'%(samtools_bin, tmp_bam_file, output_bam_file)
        return_code = command_runner.run_command( command)
    if return_code==0:
        command_runner.run_command( 'rm -f %s'%(tmp_bam_file))

def find_duplicates(first_reads,second_reads, distance_threshold):
    uniq_second_sequences={}
    all_second_reads=second_reads.values()
    nb_duplicate=0
    if len(all_second_reads)>0:
        uniq_second_sequences[all_second_reads[0].get_query_sequence()]=[all_second_reads[0]]
        
        for sam_record in all_second_reads[1:]:
Beispiel #4
0
    else:
        command = '%s samse %s %s %s %s | %s view -bS - > %s' % (
            BWA_bin, read_group_command, genome_file, sai_file1, fastq_file1,
            samtools_bin, bam_file)
    return_code = command_runner.run_command(command)

    if return_code is not 0:
        run_fine = False

    if sort:
        files_and_dir.append(bam_file)
        if picard_dir:
            sorted_bam_file = os.path.join(output_dir,
                                           sample_name + '_sorted.bam')
            return_code = utils.sort_bam_file_per_coordinate(picard_dir,
                                                             bam_file,
                                                             sorted_bam_file,
                                                             overwrite=True)
        else:
            sorted_bam_file = os.path.join(output_dir, sample_name + '_sorted')
            command = '%s sort %s %s' % (samtools_bin, bam_file,
                                         sorted_bam_file)
            return_code = command_runner.run_command(command)
        if return_code is not 0:
            run_fine = False

    if run_fine and clean_up:
        return_code = remove_file(files_and_dir)
        if return_code is not 0:
            run_fine = False

    return run_fine
    file_to_remove = []
    sam_file = run_smalt_paired(consensus_file, read1_fastq, read2_fastq)
    file_to_remove.append(sam_file)
    if os.path.exists(single_fastq):
        sam_file_single = run_smalt_single(consensus_file, single_fastq)
        file_to_remove.append(sam_file_single)
        corrected_sam_file = correct_smalt_sam_file(sam_file, all_read_groups, sam_file_single)
    else:
        corrected_sam_file = correct_smalt_sam_file(sam_file, all_read_groups)
    file_to_remove.append(corrected_sam_file)

    name, ext = os.path.splitext(corrected_sam_file)

    output_bam = os.path.join(name + "_sorted.bam")
    sort_bam_file_per_coordinate(picard_dir, input_bam=corrected_sam_file, output_bam=output_bam, overwrite=True,
                                 CREATE_INDEX="true")

    file_to_remove.append(output_bam)

    mark_dups_jar = os.path.join(picard_dir, 'MarkDuplicates.jar')
    mark_dups_bam = os.path.join(name + '_sorted_mrk_dup.bam')
    mark_dups_metric = os.path.join(name + '_sorted_mrk_dup.metric')
    command = 'java -Xmx5G -jar %s I=%s O=%s METRICS_FILE=%s  VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=100 CREATE_INDEX=true' % (
    mark_dups_jar, output_bam,
    mark_dups_bam, mark_dups_metric)
    command_runner.run_command(command)
    file_to_remove.append(mark_dups_bam)
    fixed_bam = os.path.join(name + '_sorted_mrk_dup_fixed.bam')
    #This command remove the duplicate flag when a read is mapped and its mate isn't
    #It also remove the unmapped read from the bam file as this prevent the merging for some reason !!
    command = """samtools view -h %s |
    file_to_remove.append(sam_file)
    if os.path.exists(single_fastq):
        sam_file_single = run_smalt_single(consensus_file, single_fastq)
        file_to_remove.append(sam_file_single)
        corrected_sam_file = correct_smalt_sam_file(sam_file, all_read_groups,
                                                    sam_file_single)
    else:
        corrected_sam_file = correct_smalt_sam_file(sam_file, all_read_groups)
    file_to_remove.append(corrected_sam_file)

    name, ext = os.path.splitext(corrected_sam_file)

    output_bam = os.path.join(name + "_sorted.bam")
    sort_bam_file_per_coordinate(picard_dir,
                                 input_bam=corrected_sam_file,
                                 output_bam=output_bam,
                                 overwrite=True,
                                 CREATE_INDEX="true")

    file_to_remove.append(output_bam)

    mark_dups_jar = os.path.join(picard_dir, 'MarkDuplicates.jar')
    mark_dups_bam = os.path.join(name + '_sorted_mrk_dup.bam')
    mark_dups_metric = os.path.join(name + '_sorted_mrk_dup.metric')
    command = 'java -Xmx5G -jar %s I=%s O=%s METRICS_FILE=%s  VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=100 CREATE_INDEX=true' % (
        mark_dups_jar, output_bam, mark_dups_bam, mark_dups_metric)
    command_runner.run_command(command)
    file_to_remove.append(mark_dups_bam)
    fixed_bam = os.path.join(name + '_sorted_mrk_dup_fixed.bam')
    #This command remove the duplicate flag when a read is mapped and its mate isn't
    #It also remove the unmapped read from the bam file as this prevent the merging for some reason !!