def __missing__(self, key): dataset, cross_fold_idx = key positive_seqs = [ seq_to_numpy(s) for s in convert_seqs(sequence_files['%s-%d-validate' % (dataset, cross_fold_idx)]) ] # load the negative set and match the lengths of the positive sequences negative_seqs = [ seq_to_numpy(neg[:len(pos)]) for pos, neg in zip( positive_seqs, self.negative_seq_generators[dataset] ) ] if len(negative_seqs) != len(positive_seqs): raise RuntimeError('Not enough sequences in negative set to match positive sequences. %d positive, %d negative sequences' % (len(positive_seqs), len(negative_seqs))) for i, (pos, neg) in enumerate(zip(positive_seqs, negative_seqs)): if len(neg) < len(pos): raise RuntimeError( 'Not enough bases in negative sequence %d to match length of positive sequence: positive sequence has %d bases and negative sequence has %d bases' % ( i, pos.shape[0], neg.shape[0] ) ) self[key] = (positive_seqs, negative_seqs) return self[key]
def __missing__(self, k): self[k] = [ seq_to_numpy(s) for s in convert_seqs(sequence_files[k]) ] return self[k]