def setup_featurizer(self, path: str): self.ch_dict = utils.load_dict(f"{path}/characters.json") self.sy_dict = utils.load_dict(f"{path}/syllables.json") return dict( num_char_tokens=len(self.ch_dict), num_tokens=len(self.sy_dict) )
def __init__(self, dir:str = None, dict_dir: str = None, path: str = None, output_scheme = None): self.ch_dict = utils.load_dict(f"{dict_dir}/characters.json") self.sy_dict = utils.load_dict(f"{dict_dir}/syllables.json") self.dict_dir = dict_dir print(f"we have {len(self.sy_dict)} syllables from {dict_dir}") self.ch_ix_2_ch = dict(zip(self.ch_dict.values(), self.ch_dict.keys())) super(SyllableCharacterSeqDataset, self).__init__(dir, dict_dir, path, output_scheme)
def main(sampling=10, output_dir="./data"): with open("%s/training.files" % DATA_PATH, "r") as f: training_files = [] for l in f: training_files.append(get_actual_filename(l.strip())) with open("%s/validation.files" % DATA_PATH, "r") as f: val_files = [] for l in f: val_files.append(get_actual_filename(l.strip())) ch2ix = utils.load_dict(CHARACTER_DICT) sy2ix = utils.load_dict(SYLLABLE_DICT) prepare_syllable_charater_seq_data((training_files, val_files), ch2ix, sy2ix, sampling=sampling, output_dir=output_dir)
def __init__(self, dir:str = None, dict_dir: str = None, path: str = None, output_scheme = None): self.dict = utils.load_dict(f"{dict_dir}/characters.json") self.ch_ix_2_ch = dict(zip(self.dict.values(), self.dict.keys())) super(CharacterSeqDataset, self).__init__(dir, dict_dir, path, output_scheme)
def __init__(self, dir:str = None, dict_dir: str = None, path: str = None, output_scheme = None): self.sy_dict = utils.load_dict(f"{dict_dir}/syllables.json") print(f"we have {len(self.sy_dict)} syllables") super(SyllableSeqDataset, self).__init__(dir, dict_dir, path, output_scheme)
def setup_featurizer(self, path: str): self.dict = utils.load_dict(f"{path}/characters.json") return dict(num_tokens=len(self.dict))