def __init__(self, fasta, template_model_filepath, complement_model_filepath): self.fasta_file = fasta self.ref_events_dict = {} self.query_events = [] self.reference_length = 0 self.template_model = EventModel(template_model_filepath) self.complement_model = EventModel(complement_model_filepath) self._convert_fast5_to_fasta() self.event_mapper = EventMapper() self.samfile = DtwToSam(path_to_template_model_file=os.path.abspath(template_model_filepath), path_to_complement_model_file=os.path.abspath(complement_model_filepath), rname=self.fasta_file)
model_kmer_means_complement=process_model_file(model_file_complement) kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template) kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement) seqlengths = get_seq_len(fasta_file) readprediction=dict() #SamAlignments=[] SamOutFile=DtwToSam(path_to_template_model_file=os.path.abspath(model_file_template),path_to_complement_model_file=os.path.abspath(model_file_complement),outfile=args.outsam,rname="EM_079517") for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')): print filename if (args.verbose is True): print os.path.abspath(filename) hdf = h5py.File(filename, 'r') #try: for read in hdf['Analyses']['EventDetection_000']['Reads']: events = hdf['Analyses']['EventDetection_000']['Reads'][read]['Events'][()] event_collection=list() for event in events: event_collection.append(float(event[2]))
model_kmer_means_complement=process_model_file(model_file_complement) kmerhashT = process_ref_fasta_raw(fasta_file,model_kmer_means_template) kmerhashC = process_ref_fasta_raw(fasta_file,model_kmer_means_complement) seqlengths = get_seq_len(fasta_file) readprediction=dict() #SamAlignments=[] SamOutFile=DtwToSam(path_to_template_model_file=os.path.abspath(model_file_template),path_to_complement_model_file=os.path.abspath(model_file_complement),outfile=args.outsam,rname="EM_079517") for filename in glob.glob(os.path.join(args.watchdir, '*.fast5')): print filename if (args.verbose is True): print os.path.abspath(filename) hdf = h5py.File(filename, 'r') #try: for read in hdf['Analyses']['EventDetection_000']['Reads']: events = hdf['Analyses']['EventDetection_000']['Reads'][read]['Events'][()] event_collection=list() for event in events: print event[2] #print type event[2] event_collection.append(event[2])
class Fast52FastaMapper(object): """docstring for Fast5ToFastaMapper""" def __init__(self, fasta, template_model_filepath, complement_model_filepath): self.fasta_file = fasta self.ref_events_dict = {} self.query_events = [] self.reference_length = 0 self.template_model = EventModel(template_model_filepath) self.complement_model = EventModel(complement_model_filepath) self._convert_fast5_to_fasta() self.event_mapper = EventMapper() self.samfile = DtwToSam(path_to_template_model_file=os.path.abspath(template_model_filepath), path_to_complement_model_file=os.path.abspath(complement_model_filepath), rname=self.fasta_file) def close(self): self.samfile.close() def _convert_fast5_to_fasta(self): ## Convert reference to events self.ref_events_dict = { "template" : {'forward' : [],'reverse' : []}, "complement" : {'forward' : [],'reverse' : []}} for record in SeqIO.parse(self.fasta_file, 'fasta'): ref_seq = record.seq self.reference_length += len(record.seq) self.ref_events_dict["template"]["forward"].extend(Seq2Events(ref_seq, model = self.template_model).events) self.ref_events_dict["complement"]["forward"].extend(Seq2Events(ref_seq, model = self.complement_model).events) self.ref_events_dict["template"]["reverse"].extend(Seq2Events(ref_seq.reverse_complement(), model = self.template_model).events) self.ref_events_dict["complement"]["reverse"].extend(Seq2Events(ref_seq.reverse_complement(), model = self.complement_model).events) def map(self, fast5_file): fast5_file = Fast5File(fast5_file) template_events, complement_events = self._split_events(fast5_file) template_start_event = self._calculate_start_event(template_events) complement_start_event = self._calculate_start_event(complement_events) template_forward_results = self.event_mapper.map( self.ref_events_dict["template"]["forward"], template_events[template_start_event:] ) template_reverse_results = self.event_mapper.map( self.ref_events_dict["template"]["reverse"], template_events[template_start_event:] ) self._convert_path_to_template_postitions(template_forward_results.path, template_start_event) self._convert_path_to_template_postitions(template_reverse_results.path, template_start_event, forward = False) if complement_events: complement_forward_results = sself.event_mapper.map( self.ref_events_dict["complement"]["forward"], complement_events[complement_start_event:] ) complement_reverse_results = sself.event_mapper.map( self.ref_events_dict["complement"]["reverse"], complement_events[complement_start_event:] ) self._convert_path_to_template_postitions(complement_forward_results.path, template_start_event) self._convert_path_to_template_postitions(complement_reverse_results.path, template_start_event, forward = False) def _convert_path_to_template_postitions(self, path, start_event, forward = True): print path f,t = path for (f,t) in zip(f,t): read_position = f + start_event if not forward: ref_position = self.reference_length - t # print "reference length", self.reference_length else: ref_position = t # print [read_position,ref_position] def _calculate_start_event(self, events): prev1 = 0 prev2 = 0 counter = 0 start_event = 0 for event in events: if (prev1 - event.mean > 40 ) or (prev2 - event.mean > 40): start_event=counter+1 break prev2 = prev1 prev1 = event.mean counter +=1 return start_event def _split_events(self, fast5_file): events = fast5_file.get_pre_basecalled_events() if fast5_file.read_metadata[0].get('has_hairpin',False): template_events = events[:self.fast5_file.read_metadata['hairpin_event_index'] ] complement_events = events[self.fast5_file.read_metadata['hairpin_event_index']:] else: template_events = events complement_events = [] return template_events, complement_events