Esempio n. 1
0
def archive_path(path: str) -> None:
    modif_file = _get_last_modif_file_path_for_dir(path)
    timestamp = get_timestamp(path)
    if not os.path.exists(DEFAULT_PREP_DATASETS_DIR):
        os.makedirs(DEFAULT_PREP_DATASETS_DIR)
    os.rename(path, os.path.join(DEFAULT_PREP_DATASETS_DIR, f'{os.path.basename(path)}.{ARCHIVED_EXT}.{timestamp}'))
    os.rename(modif_file, os.path.join(DEFAULT_PREP_DATASETS_DIR, f'{os.path.basename(modif_file)}.{ARCHIVED_EXT}.{timestamp}'))
Esempio n. 2
0
def is_path_outdated(path: str) -> bool:
    modif_file = _get_last_modif_file_path_for_dir(path)
    if not os.path.exists(modif_file):
        raise FileNotFoundError()
    with open(modif_file) as f:
        expected_timestamp = f.read()
        actual_timestamp = get_timestamp(path)
        return (expected_timestamp != actual_timestamp)
Esempio n. 3
0
    def __init__(self, path: str, prep_config: PrepConfig, normalized_extension_list: Optional[List[str]],
                 custom_bpe_config: Optional[CustomBpeConfig],
                 bpe_config: Optional[BpeConfig],
                 overridden_path_to_prep_dataset):
        self._path = path
        self._prep_config = prep_config
        self._normalized_extension_list = normalized_extension_list
        self._custom_bpe_config = custom_bpe_config
        self._bpe_config = bpe_config
        self._dataset_last_modified = get_timestamp(path)

        self._original = SubDataset(self, self.path)
        self._parsed = SubDataset(self, self._get_path_to_parsed_dataset(), suffix=PARSED_EXTENSION)
        self._preprocessed = SubDataset(self, self._get_path_to_prep_dataset(overridden_path_to_prep_dataset), suffix=PREPROCESSED_EXTENSION)
Esempio n. 4
0
def set_path_ready(path: str) -> None:
    modif_file = _get_last_modif_file_path_for_dir(path)
    with open(modif_file, 'w') as f:
        f.write(get_timestamp(path))