def set_read1_consensus_to_read2(input_stream, output_stream):
    
    #get the header
    line = input_stream.readline()
    while line.startswith("@"):
        output_stream.write(line)
        line = input_stream.readline()
    prev_read=Sam_record(line)
    for line in input_stream:
        read=Sam_record(line)
        if prev_read and read.get_query_name() == prev_read.get_query_name():
            if read.is_second_read() and prev_read.is_first_read():
                read1=prev_read
                read2=read
            else:
                read2=prev_read
                read1=read
            if not read1.is_unmapped():
                read2.set_reference_name(read1.get_reference_name())
                read2.set_unmapped_flag(False)
                read2.set_position(1)
                read2.set_cigar_string("%sM"%len(read2.get_query_sequence()))
            output_stream.write(str(read1))
            output_stream.write(str(read2))
            prev_read=None
        elif prev_read:
            output_stream.write(str(prev_read))
            prev_read=read
        else:
            prev_read=read
Ejemplo n.º 2
0
def load_from_sites_generator(stream):
    all_unmatched_read1={}
    all_unmatched_read2={}
    count_line=0
    for line in stream:
        count_line+=1
        if count_line%10000==0:
            sys.stderr.write('%s %s %s\n'%(count_line, len(all_unmatched_read1), len(all_unmatched_read2)))
        sam_record = Sam_record(line)
        if sam_record.is_first_read():
            sam_record_r1 = sam_record
            sam_record_r2 = all_unmatched_read2.pop(sam_record.get_query_name(),None)
            if not sam_record_r2:
               all_unmatched_read1[sam_record.get_query_name()]=sam_record
        else:
            sam_record_r2 = sam_record
            sam_record_r1 = all_unmatched_read1.pop(sam_record.get_query_name(),None)
            if not sam_record_r1:
                all_unmatched_read2[sam_record.get_query_name()]=sam_record

        if sam_record_r1 and sam_record_r2:
            yield  ((sam_record_r1,sam_record_r2))
 command ="%s view -bS - | %s sort - %s"%(samtools_bin,  samtools_bin, output_bam_file)
 logging.info(command)
 output_stream,process_output= utils_commands.get_input_stream_from_command(command)
 
 #get the header
 line = input_stream.readline()
 while line.startswith("@"):
     output_stream.write(line)
     line = input_stream.readline()
 
 while line:
     read1=Sam_record(line)
     line = input_stream.readline()
     read2=Sam_record(line)
     if read1.get_query_name() == read2.get_query_name():
         if read1.is_second_read() and read2.is_first_read():
             tmp = read1
             read1=read2
             read2=tmp
         read2.set_reference_name(read1.get_reference_name())
         output_stream.write(str(read1))
         output_stream.write(str(read2))
     else:
         logging.critical("bam file is not sorted by read name")
         input_stream.close()
         output_stream.close()
         #os.remove(output_bam_file+'.bam')
         return
     line = input_stream.readline()
     
 return_code=process_input.wait()