def cache_from(self, src: Queryable, cnt=None) -> None: """ Caches data from a given queryable (for instance, from one produced by DataSource::get_data). Args: src: Queryable to cache from cnt: amount of objects to cache Returns: """ q = src if cnt is not None: q = q.take(cnt) full_path = str(self.path) os.makedirs(Path(full_path).parent.__str__(), exist_ok=True) tmp_path = full_path + '.tmp' file_agg = self._get_writing_aggregator(tmp_path) pipeline = Query.push().split_pipelines( file=file_agg, cnt=agg.Count() ) result = q.feed(pipeline) if os.path.isfile(full_path): os.remove(full_path) shutil.move(tmp_path, full_path) FileIO.write_text(str(result['cnt']), self.length_path)
def make_container(task: ContaineringTask): """ Creates a package of the current training grounds, and then uses the package to create a local container that trains the provided model """ release = Loc.temp_path.joinpath('release/container') # type:Path os.makedirs(release.__str__(), exist_ok=True) packaging_info = make_package(task.packaging_task, release) install_libraries = '' for dep_list in task.packaging_task.dependencies: install_libraries += 'RUN pip install ' + ' '.join( dep_list.dependencies) + "\n\n" props = dict(module=packaging_info.module_name, tg_name=Loc.tg_name, install_libraries=install_libraries, package_filename=packaging_info.path.name) entry_file = task.entry_file_template.format(**props) FileIO.write_text(entry_file, release.joinpath(task.entry_file_name)) docker_file = task.dockerfile_template.format(**props) FileIO.write_text(docker_file, release.joinpath('Dockerfile')) call([ 'docker', 'build', '-t', task.image_name + ":" + task.image_tag, release.__str__() ]) shutil.rmtree(release)
def store_artifact(self, path: List[Any], name: Any, object: Any): output_path = self.model_folder for path_item in path: output_path /= str(path_item) os.makedirs(str(output_path), exist_ok=True) output_path /= str(name) FileIO.write_pickle(object, output_path) self.log(f"Saved artifact {output_path}")
def unpickle(self, subpath) -> Any: if self.hacked_unpickling: package = FileIO.read_jsonpickle( self.path.joinpath('package.json')) with open(str(self.path.joinpath(subpath)), 'rb') as file: unpickler = HackedUnpicker(file, package['tg_module_name'], package['original_tg_module_name']) return unpickler.load() else: return FileIO.read_pickle(self.path.joinpath(subpath))
def test_ordinary_unpickling(self): FileIO.write_pickle(class_a.TestClass(), 'test.pkl') ta = FileIO.read_pickle('test.pkl') self.assertIsInstance(ta, class_a.TestClass) self.assertEqual('A',ta.get_value()) with open('test.pkl','rb') as file_obj: tb = HackedUnpicker(file_obj, 'tg.common.test_common.test_delivery.test_packaging.class_a','tg.common.test_common.test_delivery.test_packaging.class_b').load() self.assertIsInstance(tb, class_b.TestClass) self.assertEqual('B',tb.get_value()) os.remove('test.pkl')
def get_data(self): try: length = int(FileIO.read_text(self.length_path)) except: length = None q = self._self_get_reading_query() return Queryable(q, length)
def execute(entry: EntryPoint): folder = Path('/opt/ml/model') env = FileCacheTrainingEnvironment(_myprint, folder) hyperparams = FileIO.read_json('/opt/ml/input/config/hyperparameters.json') if '_tuning_objective_metric' in hyperparams: del hyperparams['_tuning_objective_metric'] model = env.common_initialization(entry, Path('/opt/ml/code/package.tar.gz'),{}, 'model') env.log('Starting training now...') model.run_with_environment('/opt/ml/input/data/training/', env)
def _parse_base(self): text = self.mock if self.mock is None: text = FileIO.read_text(self.file_path) current = None parser = HeaderParser() for line in text.split('\n'): line = InterFormatParser._circumvent_separator_problems(line) resp = parser.observe(line) if resp == HeaderParseResponse.Ignore: continue if resp == HeaderParseResponse.NewTextBlock: if current is not None: yield current current = None if current is None: current = ([], parser.get_header_tags()) current[0].append(line) if current is not None: yield current
def test_zip_file(self): src = Query.en(range(10)) path = Path(__file__).parent.joinpath('test_cache') cache = ZippedFileDataSource(path, buffer_size=4) self.assertEqual(False, cache.is_available()) cache.cache_from(src, 7) self.assertEqual(True, cache.is_available()) self.assertEqual( "7", FileIO.read_text(path.__str__() + '.pkllines.zip.length')) stored = Query.file.zipped_folder(path.__str__() + '.pkllines.zip').to_dictionary() self.assertEqual(2, len(stored)) self.assertListEqual([0, 1, 2, 3], stored['0']) self.assertListEqual([4, 5, 6], stored['1']) result = cache.get_data().to_list() self.assertListEqual(list(range(7)), result) os.unlink(path.__str__() + '.pkllines.zip.length') os.unlink(path.__str__() + '.pkllines.zip')
def common_initialization(self, entry: EntryPoint, package_location: Optional[Path] = None, hyperparams: Optional[Dict] = None, resource_name='model') -> AbstractTrainingTask: self.log('Common Training Initialization') self.log('Preparing package properties...') package_props = entry.get_properties() props_str = json.dumps(package_props) self.log(props_str) FileIO.write_text(props_str, self.get_file_name('package.json')) if package_location is not None: self.log('Preparing package file...') shutil.copy(str(package_location), str(self.get_file_name('package.tag.gz'))) self.log('Loading model from package...') model = entry.load_resource(resource_name) if hyperparams is None: self.log("No hyperparameters are provided") else: self.log("Hyperparameters are provided") self.log(hyperparams) self.log("Storing Hyperparameters in file") FileIO.write_json(hyperparams, self.get_file_name('hyperparameters.json')) self.log(f'Applying hyperparams') model.apply_hyperparams(hyperparams) self.log("Model initialized. Jsonpickling...") model_state = json.dumps(json.loads(jsonpickle.dumps(model)), indent=1) FileIO.write_text(model_state, self.get_file_name('task.json')) self.log("Common Training Initialization completed") return model
def make_package( task: PackagingTask, dst_location: Optional[Union[Path, str]] = None) -> PackageInfo: """ Creates the package out of the :class:``PackagingTask``, and returns :class:``PackagingInfo``` describing this package """ if dst_location is None: dst_location = Loc.temp_path.joinpath('release/package') elif isinstance(dst_location, str): dst_location = Path(dst_location) elif not isinstance(dst_location, Path): raise ValueError( f'dst_location was {dst_location}, while str or Path is expected') if not os.path.isdir(dst_location): os.makedirs(dst_location, exist_ok=True) root = Loc.tg_path # type:Path release = Loc.temp_path.joinpath('release/package_tmp') # type:Path try: shutil.rmtree(release.__str__()) except: pass os.makedirs(release.__str__()) full_module_name = _full_module_name(task.name, task.version) lib = release.joinpath(full_module_name) shutil.copytree(root.__str__(), lib.joinpath(Loc.tg_name).__str__()) resources = lib.joinpath('resources') # type: Path os.makedirs(resources.__str__()) props = dict( module_name=task.name, version=task.version, full_module_name=full_module_name, dependencies=','.join(f"'{z}'" for dep_list in task.dependencies for z in dep_list.dependencies), tg_name=Loc.tg_name, full_tg_name=full_module_name + '.' + Loc.tg_name, ) for key, value in task.payload.items(): FileIO.write_pickle(value, resources.joinpath(key)) FileIO.write_text(_MANIFEST_TEMPLATE.format(**props), release.joinpath('MANIFEST.in')) FileIO.write_text(_SETUP_TEMPLATE.format(**props), release.joinpath('setup.py')) FileIO.write_json(props, release.joinpath('properties.json')) FileIO.write_text(_INIT_TEMPLATE.format(**props), lib.joinpath('__init__.py')) pwd = os.getcwd() os.chdir(release.__str__()) subprocess.call([sys.executable, 'setup.py', 'sdist']) os.chdir(pwd) file = Query.folder(release.joinpath('dist')).single() dst_location = dst_location.joinpath( f'{full_module_name}-{task.version}.tar.gz') shutil.copy(file.__str__(), dst_location.__str__()) shutil.rmtree(release.__str__()) return PackageInfo(task, full_module_name, dst_location)
def _replace_file(suffix): path = Path(__file__).parent.joinpath('class_hmr.py') FileIO.write_text(_TEMPLATE.format(suffix), path)