예제 #1
0
 def _load_encode_rna(self, class_files):
     self.data, self.labels = [], []
     replacer_seq = lambda x: choice(self.alpha_coder.alph0)
     replacer_struct = lambda x: choice(self.alpha_coder.alph1)
     pattern_seq = r"[^{}]".format(re.escape(self.alpha_coder.alph0))
     pattern_struct = r"[^{}]".format(re.escape(self.alpha_coder.alph1))
     for class_id, file_name in enumerate(class_files):
         handle = io.get_handle(file_name, "rt")
         for header, block in io.parse_fasta(handle, "_"):
             lines = block.split("_")
             sequence = re.sub(pattern_seq, replacer_seq, lines[0].upper())
             if True == self.is_rna_pwm:
                 pwm = np.zeros(
                     (len(sequence), len(self.alpha_coder.alph1)),
                     dtype=np.float32)
                 for x in range(1, pwm.shape[1] + 1):
                     pwm[:, x - 1] = list(map(float, lines[x].split()))
                 self.data.append(self._join_seq_pwm(sequence, pwm))
             else:
                 structure = re.sub(pattern_struct, replacer_struct,
                                    lines[1].split(" ")[0].upper())
                 joined = self.alpha_coder.encode((sequence, structure))
                 self.data.append(self.one_hot_encoder.encode(joined))
             if self.multilabel:
                 self.labels.append(list(map(int, header.split(','))))
             else:
                 self.labels.append([class_id])
         handle.close()
예제 #2
0
 def _load_encode_dna(self, class_files):
     self.data, self.labels = [], []
     replacer = lambda x: choice(self.one_hot_encoder.alphabet)
     for class_id, file_name in enumerate(class_files):
         handle = io.get_handle(file_name, "rt")
         for header, sequence in io.parse_fasta(handle):
             sequence = re.sub(r"[NYMRWK]", replacer, sequence.upper())
             self.data.append(self.one_hot_encoder.encode(sequence))
             if self.multilabel:
                 self.labels.append(list(map(int, header.split(','))))
             else:
                 self.labels.append([class_id])
         handle.close()
예제 #3
0
 def _load_encode_rna(self, class_files):
     self.data, self.labels = [], []
     replacer_seq = lambda x: choice(self.alpha_coder.alph0)
     replacer_struct = lambda x: choice(self.alpha_coder.alph1)
     if self.alpha_coder.alph1 == "HIMS":
         idx = 2
     else:
         idx = 1
     for class_id, file_name in enumerate(class_files):
         handle = io.get_handle(file_name, "rt")
         for header, block in io.parse_fasta(handle, "_"):
             lines = block.split("_")
             sequence = re.sub(r"[NYMRWK]", replacer_seq, lines[0])
             structure = re.sub(r"[FT]", replacer_struct,
                                lines[idx].split(" ")[0].upper())
             joined = self.alpha_coder.encode((sequence, structure))
             self.data.append(self.one_hot_encoder.encode(joined))
             if self.multilabel:
                 self.labels.append(list(map(int, header.split(','))))
             else:
                 self.labels.append([class_id])
         handle.close()