Example #1
0
    def save(self,
             save_on_dir: str,
             take_split: bool = True,
             test_max_size: int = None,
             source_vocab_size: int = None,
             target_vocab_size: int = None,
             shuffle=True,
             prefix=None,
             ext='align',
             **kwargs):
        save_on_dir = Path(save_on_dir)
        if take_split:
            x_train, y_train, x_test, y_test = self.split_data(
                test_max_size=test_max_size,
                source_vocab_size=source_vocab_size,
                target_vocab_size=target_vocab_size,
                take_parallel_data=False,
                shuffle=shuffle)
            train_prefix, test_prefix = (
                f'{prefix}_train',
                f'{prefix}_test') if prefix is not None else ('train', 'test')
            data_to_save = ((train_prefix, x_train, y_train), (test_prefix,
                                                               x_test, y_test))
        else:
            data_to_save = ((prefix, self.source.data, self.target.data), )

        for prefix, x, y in data_to_save:
            save_on = save_on_dir.join(
                f'{prefix}_{self.source_language}.{ext.strip(".")}')
            FileIO.create(save_on, data=x).save(**kwargs)
            save_on = save_on_dir.join(
                f'{prefix}_{self.target_language}.{ext.strip(".")}')
            FileIO.create(save_on, data=y).save(**kwargs)
Example #2
0
    def load_corpus_from_csv(cls, path_: str, src_col_name: str, trg_col_name: str, source_name=None,
                             target_name=None):

        csv_read = csv.DictReader(FileIO.load(path_).data)
        src_data = []
        trg_data = []
        for i in csv_read:
            for col_name in (src_col_name, trg_col_name):
                if col_name not in i:
                    raise ValueError(f"Not found col <{col_name}> in {list(i.keys())}")
            src_data.append(i[src_col_name])
            trg_data.append(i[trg_col_name])
        return cls(src_data, trg_data, source_name=source_name, target_name=target_name)
Example #3
0
 def save(self, filepath: str):
     FileIO.create(filepath, data=self.__language_data).save(exist_ok=True)
Example #4
0
 def load(self, filepath: str):
     self._is_model = True
     self.__language_data = FileIO.load(filepath).data
     return self
Example #5
0
 def save(self, path_, **kwargs):
     from cereja import FileIO, Path
     assert Path(path_).suffix == '.py', "Only python source code."
     FileIO.create(path_, self._source_code).save(**kwargs)