def __init__(self, args): """ Initializes the dataset. """ super(MixedTrainDataset, self).__init__() self._data_files = args["data_files"] if isinstance(self._data_files, str): self._data_files = yaml.load(args["data_files"], Loader=yaml.FullLoader) assert isinstance(self._data_files, dict) self._data_sampler = build_data_sampler(args) common_properties = args["common_properties"] if common_properties is None: common_properties = {} elif isinstance(common_properties, str): common_properties = yaml.load(common_properties, Loader=yaml.FullLoader) assert isinstance(common_properties, dict) self._custom_dss = dict() self._status = None for name, ds in self._data_files.items(): self._custom_dss[name] = build_dataset(args["data_class"], **ds, **common_properties) if self._status is None: self._status = self._custom_dss[name].status else: assert self._status == self._custom_dss[name].status, ( "Status of each dataset are supposed to be the same.") self._data_sampler = build_data_sampler(args)
def __init__(self, args): """ Initializes the dataset. """ super(MultipleParallelTextDataset, self).__init__(src_lang=args["src_lang"], trg_lang=args["trg_lang"]) self._data_files = args["data_files"] if isinstance(self._data_files, str): self._data_files = yaml.load(args["data_files"], Loader=yaml.FullLoader) assert isinstance(self._data_files, dict) self._data_is_processed = args["data_is_processed"] self._data_sampler = build_data_sampler(args)
def __init__(self, args): """ Initializes the dataset. """ super(MultilingualTranslationTFRecordDataset, self).__init__() self._path = args["path"] self._data_sampler = build_data_sampler(args) self._auto_switch_langs = args["auto_switch_langs"]