Beispiel #1
0
    def __missing__(self, key):
        dataset, cross_fold_idx = key
        positive_seqs = [
          seq_to_numpy(s)
          for s
          in convert_seqs(sequence_files['%s-%d-validate' % (dataset, cross_fold_idx)])
        ]
        # load the negative set and match the lengths of the positive sequences

        negative_seqs = [
          seq_to_numpy(neg[:len(pos)])
          for pos, neg
          in zip(
            positive_seqs,
            self.negative_seq_generators[dataset]
          )
        ]
        if len(negative_seqs) != len(positive_seqs):
            raise RuntimeError('Not enough sequences in negative set to match positive sequences. %d positive, %d negative sequences' % (len(positive_seqs), len(negative_seqs)))

        for i, (pos, neg) in enumerate(zip(positive_seqs, negative_seqs)):
            if len(neg) < len(pos):
                raise RuntimeError(
                  'Not enough bases in negative sequence %d to match length of positive sequence: positive sequence has %d bases and negative sequence has %d bases' % (
                    i,
                    pos.shape[0],
                    neg.shape[0]
                  )
                )

        self[key] = (positive_seqs, negative_seqs)
        return self[key]
Beispiel #2
0
 def __missing__(self, k):
     self[k] = [
       seq_to_numpy(s)
       for s
       in convert_seqs(sequence_files[k])
     ]
     return self[k]