def __init__(self, args):
     """ Initializes the dataset. """
     super(MixedTrainDataset, self).__init__()
     self._data_files = args["data_files"]
     if isinstance(self._data_files, str):
         self._data_files = yaml.load(args["data_files"],
                                      Loader=yaml.FullLoader)
     assert isinstance(self._data_files, dict)
     self._data_sampler = build_data_sampler(args)
     common_properties = args["common_properties"]
     if common_properties is None:
         common_properties = {}
     elif isinstance(common_properties, str):
         common_properties = yaml.load(common_properties,
                                       Loader=yaml.FullLoader)
     assert isinstance(common_properties, dict)
     self._custom_dss = dict()
     self._status = None
     for name, ds in self._data_files.items():
         self._custom_dss[name] = build_dataset(args["data_class"], **ds,
                                                **common_properties)
         if self._status is None:
             self._status = self._custom_dss[name].status
         else:
             assert self._status == self._custom_dss[name].status, (
                 "Status of each dataset are supposed to be the same.")
     self._data_sampler = build_data_sampler(args)
Beispiel #2
0
 def __init__(self, args):
     """ Initializes the dataset. """
     super(MultipleParallelTextDataset,
           self).__init__(src_lang=args["src_lang"],
                          trg_lang=args["trg_lang"])
     self._data_files = args["data_files"]
     if isinstance(self._data_files, str):
         self._data_files = yaml.load(args["data_files"],
                                      Loader=yaml.FullLoader)
     assert isinstance(self._data_files, dict)
     self._data_is_processed = args["data_is_processed"]
     self._data_sampler = build_data_sampler(args)
 def __init__(self, args):
     """ Initializes the dataset. """
     super(MultilingualTranslationTFRecordDataset, self).__init__()
     self._path = args["path"]
     self._data_sampler = build_data_sampler(args)
     self._auto_switch_langs = args["auto_switch_langs"]