コード例 #1
0
    def __getitem__(self, idx):
        batch_index = np.arange(idx * self.batch_size,
                                min(self.size, (idx + 1) * self.batch_size))
        df = self.df.iloc[batch_index]
        data_seq = np.zeros((len(df), MAXLEN, 21), dtype=np.float32)
        data_gos = np.zeros((len(df), len(self.gos_dict)), dtype=np.float32)
        data_exp = np.zeros((len(df), 53), dtype=np.float32)
        labels = np.zeros((len(df), len(self.terms_dict)), dtype=np.int32)
        for i, row in enumerate(df.itertuples()):
            data_seq[i, :] = to_onehot(row.sequences)
            data_exp[i, :] = row.expressions
            for item in row.deepgo_annotations:
                t_id, score = item.split('|')
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = float(score)

            for t_id in row.iea_annotations:
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = 1

            for t_id in row.go_annotations:
                if t_id in self.gos_dict:
                    data_gos[i, self.gos_dict[t_id]] = 1

            for t_id in row.hp_annotations:
                if t_id in self.terms_dict:
                    labels[i, self.terms_dict[t_id]] = 1
        data = np.concatenate([data_gos, data_exp], axis=1)
        return (data, labels)
コード例 #2
0
 def __getitem__(self, idx):
     batch_index = np.arange(idx * self.batch_size,
                             min(self.size, (idx + 1) * self.batch_size))
     df = self.df.iloc[batch_index]
     data_onehot = np.zeros((len(df), MAXLEN, 21), dtype=np.float32)
     labels = np.zeros((len(df), self.nb_classes), dtype=np.int32)
     for i, row in enumerate(df.itertuples()):
         seq = row.sequences
         onehot = to_onehot(seq)
         data_onehot[i, :, :] = onehot
         for t_id in row.prop_annotations:
             if t_id in self.terms_dict:
                 labels[i, self.terms_dict[t_id]] = 1
     self.start += self.batch_size
     print(data_onehot, labels)
     return (data_onehot, labels)
コード例 #3
0
 def next(self):
     if self.start < self.size:
         batch_index = np.arange(
             self.start, min(self.size, self.start + self.batch_size))
         df = self.df.iloc[batch_index]
         data_onehot = np.zeros((len(df), MAXLEN, 21), dtype=np.int32)
         labels = np.zeros((len(df), self.nb_classes), dtype=np.int32)
         for i, row in enumerate(df.itertuples()):
             seq = row.sequences
             onehot = to_onehot(seq)
             data_onehot[i, :, :] = onehot
             for t_id in row.prop_annotations:
                 if t_id in self.terms_dict:
                     labels[i, self.terms_dict[t_id]] = 1
         self.start += self.batch_size
         return (data_onehot, labels)
     else:
         self.reset()
         return self.next()
コード例 #4
0
def get_data(sequences):
    pred_seqs = []
    ids = []
    for i, seq in enumerate(sequences):
        if len(seq) > MAXLEN:
            st = 0
            while st < len(seq):
                pred_seqs.append(seq[st: st + MAXLEN])
                ids.append(i)
                st += MAXLEN - 128
        else:
            pred_seqs.append(seq)
            ids.append(i)
    n = len(pred_seqs)
    data = np.zeros((n, MAXLEN, 21), dtype=np.float32)
    
    for i in range(n):
        seq = pred_seqs[i]
        data[i, :, :] = to_onehot(seq)
    return ids, data