def load_data_inference(self): """Load data in using inference functions""" f_signal = read_signal(self.file_path, normalize=True) f_signal = f_signal[self.start_index:] sig_len = len(f_signal) for indx in range(0, sig_len, self.step): segment_sig = f_signal[indx:indx + self.seq_len] segment_len = len(segment_sig) padded_segment_sig = self.padding(segment_sig, self.seq_len) yield self.inference_labels(input=np.asarray(padded_segment_sig), seq_len=np.asarray(segment_len))
def trim_signal(signal_file, label_file, outdir): """Trim signal file to only have signal aligned from label file""" outpath = os.path.join( outdir, signal_file.split('/')[-1].split('.')[0] + ".trim.signal") signal = read_signal(signal_file, normalize=False) label = read_label(label_file) start = label.start[0] end = label.start[-1] final_signal = signal[start:end] with open(outpath, 'w+') as f_signal: f_signal.write(" ".join(str(val) for val in final_signal)) return outpath
def trim_complement_signal(self, outdir): """Trim signal file to only have signal aligned from label file""" assert os.path.isdir(outdir), "{} does not exist".format(outdir) assert os.path.isfile(self.label_file), "{} does not exist".format( self.label_file) out_path = os.path.join( outdir, self.signal_file.split('/')[-1].split('.')[0] + ".trim.signal") signal = read_signal(self.signal_file, normalize=False) label = read_label(self.label_file) start = label.start[0] end = label.start[-1] final_signal = signal[start:end] out_path = self.write_signal(final_signal, out_path) return out_path
def load_data(self): """Read in data from signal files and create specific motif comparisons""" event = list() event_length = list() label = list() label_length = list() count = 0 file_count = 0 for name in self.file_list: if name.endswith(".signal"): try: file_pre = os.path.splitext(name)[0] f_signal = read_signal(name) label_name = file_pre + '.label' trim_signal = SignalLabel(name, label_name) motif_generator = trim_signal.trim_to_motif(["CCAGG", "CCTGG", "CEAGG", "CETGG"], prefix_length=0, suffix_length=0, methyl_index=1, blank=self.blank) for motif in motif_generator: tmp_event, tmp_event_length, tmp_label, tmp_label_length = read_raw(f_signal, motif, self.seq_len, short=True) event += tmp_event event_length += tmp_event_length label += tmp_label label_length += tmp_label_length count = len(event) if file_count % 10 == 0: sys.stdout.write("%d lines read. \n" % (count)) file_count += 1 except ValueError: print("Error Reading Data from file {}".format(name)) continue padded_labels = [] pad_len = max(label_length) for i in range(len(label)): padded_labels.append(np.lib.pad(label[i], (0, pad_len - label_length[i]), 'constant', constant_values=-1)) return self.training_labels(input=np.asarray(event), seq_len=np.asarray(event_length), label=padded_labels)
def load_data(self): """Read in data from signal files and create specific motif comparisons""" event = list() event_length = list() label = list() label_length = list() count = 0 file_count = 0 for name in self.file_list: if name.endswith(".signal"): try: file_pre = os.path.splitext(name)[0] f_signal = read_signal(name) label_name = file_pre + '.label' f_label = read_label(label_name, skip_start=10, window_n=(self.kmer - 1) / 2, alphabet=self.len_y) tmp_event, tmp_event_length, tmp_label, tmp_label_length = read_raw(f_signal, f_label, self.seq_len, max_event_length= self.max_event_len) event += tmp_event event_length += tmp_event_length label += tmp_label label_length += tmp_label_length count = len(event) if file_count % 10 == 0: sys.stdout.write("%d files read. \n" % (count)) file_count += 1 except ValueError: print("Error Reading Data from file {}".format(name)) continue padded_labels = [] pad_len = max(label_length) for i in range(len(label)): padded_labels.append(np.lib.pad(label[i], (0, pad_len - label_length[i]), 'constant', constant_values=-1)) return self.training_labels(input=np.asarray(event), seq_len=np.asarray(event_length), label=padded_labels)
def read_signal(self, normalize): return read_signal(self.signal_file, normalize=normalize)