def __init__( self, intervals_file, fasta_file, num_chr_fasta=False, label_dtype=None, auto_resize_len=None, # max_seq_len=None, # use_strand=False, force_upper=True, ignore_targets=False): self.num_chr_fasta = num_chr_fasta self.intervals_file = intervals_file self.fasta_file = fasta_file self.auto_resize_len = auto_resize_len # self.use_strand = use_strand self.force_upper = force_upper # self.max_seq_len = max_seq_len # if use_strand: # # require a 6-column bed-file if strand is used # bed_columns = 6 # else: # bed_columns = 3 self.bed = BedDataset(self.intervals_file, num_chr=self.num_chr_fasta, bed_columns=3, label_dtype=parse_dtype(label_dtype), ignore_targets=ignore_targets) self.fasta_extractors = None
def test_parse_type(): with pytest.raises(Exception): parse_dtype('string') with pytest.raises(Exception): parse_dtype('np.float322') assert parse_dtype('float') == float assert parse_dtype(float) == float assert parse_dtype("np.float32") == np.float32
def __init__(self, alphabet=DNA, neutral_alphabet='N', neutral_value=0.25, dtype=None, alphabet_axis=1, dummy_axis=None): # make sure the alphabet axis and the dummy axis are valid: if dummy_axis is not None: if alphabet_axis == dummy_axis: raise ValueError("dummy_axis can't be the same as dummy_axis") if not (dummy_axis >= 0 and dummy_axis <= 2): raise ValueError("dummy_axis can be either 0,1 or 2") assert alphabet_axis >= 0 and (alphabet_axis < 2 or (alphabet_axis <= 2 and dummy_axis is not None)) self.alphabet_axis = alphabet_axis self.dummy_axis = dummy_axis self.alphabet = parse_alphabet(alphabet) self.dtype = parse_dtype(dtype) self.neutral_alphabet = neutral_alphabet self.neutral_value = neutral_value # set the transform parameters correctly if dummy_axis is not None and dummy_axis < 2: # dummy axis is added somewhere in the middle, so the alphabet axis is at the end now existing_alphabet_axis = 2 else: # alphabet axis stayed the same existing_alphabet_axis = 1 # check if no swapping needed if existing_alphabet_axis == self.alphabet_axis: self.alphabet_axis = None # how to transform the input self.transform = Compose([ OneHot(self.alphabet, neutral_alphabet=self.neutral_alphabet, neutral_value=self.neutral_value, dtype=self.dtype), # one-hot-encode DummyAxis(self.dummy_axis), # optionally inject the dummy axis SwapAxes(existing_alphabet_axis, self.alphabet_axis), # put the alphabet axis elsewhere ])