コード例 #1
0
ファイル: event_map.py プロジェクト: Phelimb/dtwmap
    def __init__(self, fasta, template_model_filepath, complement_model_filepath):
        self.fasta_file = fasta        
        self.ref_events_dict = {}
        self.query_events = []
        self.reference_length = 0

        self.template_model = EventModel(template_model_filepath)
        self.complement_model = EventModel(complement_model_filepath)        

        self._convert_fast5_to_fasta()
        self.event_mapper = EventMapper()
        self.samfile = DtwToSam(path_to_template_model_file=os.path.abspath(template_model_filepath),
                                      path_to_complement_model_file=os.path.abspath(complement_model_filepath),
                                      rname=self.fasta_file)
コード例 #2
0
ファイル: dtwmap.py プロジェクト: Phelimb/dtwmap
model_kmer_means_complement=process_model_file(model_file_complement)

kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template)
kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement)

seqlengths = get_seq_len(fasta_file)



readprediction=dict()




#SamAlignments=[]
SamOutFile=DtwToSam(path_to_template_model_file=os.path.abspath(model_file_template),path_to_complement_model_file=os.path.abspath(model_file_complement),outfile=args.outsam,rname="EM_079517")


for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')):
    print filename
    if (args.verbose is True):
        print os.path.abspath(filename)
    hdf = h5py.File(filename, 'r')
    #try:
    for read in hdf['Analyses']['EventDetection_000']['Reads']:
        events = hdf['Analyses']['EventDetection_000']['Reads'][read]['Events'][()]
        event_collection=list()
        for event in events:
            event_collection.append(float(event[2]))

コード例 #3
0
ファイル: dtwmap.py プロジェクト: minoTour/ampbalance
model_kmer_means_complement=process_model_file(model_file_complement)

kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template)
kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement)

seqlengths = get_seq_len(fasta_file)



readprediction=dict()




#SamAlignments=[]
SamOutFile=DtwToSam(path_to_template_model_file=os.path.abspath(model_file_template),path_to_complement_model_file=os.path.abspath(model_file_complement),outfile=args.outsam,rname="EM_079517")


for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')):
    print filename
    if (args.verbose is True):
        print os.path.abspath(filename)
    hdf = h5py.File(filename, 'r')
    #try:
    for read in hdf['Analyses']['EventDetection_000']['Reads']:
        events = hdf['Analyses']['EventDetection_000']['Reads'][read]['Events'][()]
        event_collection=list()
        for event in events:
            print event[2]
            #print type event[2]
            event_collection.append(event[2])
コード例 #4
0
ファイル: event_map.py プロジェクト: Phelimb/dtwmap
class Fast52FastaMapper(object):
    """docstring for Fast5ToFastaMapper"""
    def __init__(self, fasta, template_model_filepath, complement_model_filepath):
        self.fasta_file = fasta        
        self.ref_events_dict = {}
        self.query_events = []
        self.reference_length = 0

        self.template_model = EventModel(template_model_filepath)
        self.complement_model = EventModel(complement_model_filepath)        

        self._convert_fast5_to_fasta()
        self.event_mapper = EventMapper()
        self.samfile = DtwToSam(path_to_template_model_file=os.path.abspath(template_model_filepath),
                                      path_to_complement_model_file=os.path.abspath(complement_model_filepath),
                                      rname=self.fasta_file)
        

    def close(self):
         self.samfile.close()

    def _convert_fast5_to_fasta(self):
        ## Convert reference to events
        self.ref_events_dict = { "template" : {'forward' : [],'reverse' : []},
                            "complement" : {'forward' : [],'reverse' : []}}
        for record in SeqIO.parse(self.fasta_file, 'fasta'):
            ref_seq = record.seq
            self.reference_length += len(record.seq)
            self.ref_events_dict["template"]["forward"].extend(Seq2Events(ref_seq, model = self.template_model).events)
            self.ref_events_dict["complement"]["forward"].extend(Seq2Events(ref_seq, model = self.complement_model).events)
            self.ref_events_dict["template"]["reverse"].extend(Seq2Events(ref_seq.reverse_complement(), model = self.template_model).events)
            self.ref_events_dict["complement"]["reverse"].extend(Seq2Events(ref_seq.reverse_complement(), model = self.complement_model).events)    

    def map(self, fast5_file):
        fast5_file = Fast5File(fast5_file)
        template_events,  complement_events = self._split_events(fast5_file)
        template_start_event = self._calculate_start_event(template_events)
        complement_start_event = self._calculate_start_event(complement_events)

        template_forward_results = self.event_mapper.map( self.ref_events_dict["template"]["forward"], template_events[template_start_event:] )
        template_reverse_results = self.event_mapper.map( self.ref_events_dict["template"]["reverse"], template_events[template_start_event:] )

        self._convert_path_to_template_postitions(template_forward_results.path, template_start_event)
        self._convert_path_to_template_postitions(template_reverse_results.path, template_start_event, forward = False)        
        
        if complement_events:
            complement_forward_results = sself.event_mapper.map( self.ref_events_dict["complement"]["forward"], complement_events[complement_start_event:] )
            complement_reverse_results = sself.event_mapper.map( self.ref_events_dict["complement"]["reverse"], complement_events[complement_start_event:] )

            self._convert_path_to_template_postitions(complement_forward_results.path, template_start_event)
            self._convert_path_to_template_postitions(complement_reverse_results.path, template_start_event, forward = False)

    def _convert_path_to_template_postitions(self, path, start_event, forward = True):
        print path
        f,t = path
        for (f,t) in zip(f,t):
            read_position = f + start_event
            if not forward:
                ref_position = self.reference_length - t 
                # print "reference length", self.reference_length
            else:
                ref_position = t 
            # print [read_position,ref_position]

    def _calculate_start_event(self, events):
        prev1 = 0
        prev2 = 0
        counter = 0
        start_event = 0         
        for event in events:
            if (prev1 - event.mean > 40 ) or (prev2 - event.mean > 40):
                start_event=counter+1
                break
            prev2 = prev1
            prev1 = event.mean
            counter +=1        
        return start_event

    def _split_events(self, fast5_file):
        events = fast5_file.get_pre_basecalled_events()
        if fast5_file.read_metadata[0].get('has_hairpin',False):
            template_events = events[:self.fast5_file.read_metadata['hairpin_event_index'] ]
            complement_events =  events[self.fast5_file.read_metadata['hairpin_event_index']:]
        else:
            template_events = events
            complement_events = []     
        return     template_events,  complement_events