Beispiel #1
0
    def cache_from(self, src: Queryable, cnt=None) -> None:
        """
        Caches data from a given queryable (for instance, from one produced by DataSource::get_data).
        Args:
            src: Queryable to cache from
            cnt: amount of objects to cache

        Returns:

        """
        q = src
        if cnt is not None:
            q = q.take(cnt)
        full_path = str(self.path)
        os.makedirs(Path(full_path).parent.__str__(), exist_ok=True)
        tmp_path = full_path + '.tmp'
        file_agg = self._get_writing_aggregator(tmp_path)
        pipeline = Query.push().split_pipelines(
            file=file_agg,
            cnt=agg.Count()
        )
        result = q.feed(pipeline)
        if os.path.isfile(full_path):
            os.remove(full_path)
        shutil.move(tmp_path, full_path)
        FileIO.write_text(str(result['cnt']), self.length_path)
Beispiel #2
0
def make_container(task: ContaineringTask):
    """
    Creates a package of the current training grounds, and then uses the package
    to create a local container that trains the provided model
    """
    release = Loc.temp_path.joinpath('release/container')  # type:Path
    os.makedirs(release.__str__(), exist_ok=True)

    packaging_info = make_package(task.packaging_task, release)

    install_libraries = ''
    for dep_list in task.packaging_task.dependencies:
        install_libraries += 'RUN pip install ' + ' '.join(
            dep_list.dependencies) + "\n\n"

    props = dict(module=packaging_info.module_name,
                 tg_name=Loc.tg_name,
                 install_libraries=install_libraries,
                 package_filename=packaging_info.path.name)

    entry_file = task.entry_file_template.format(**props)
    FileIO.write_text(entry_file, release.joinpath(task.entry_file_name))

    docker_file = task.dockerfile_template.format(**props)

    FileIO.write_text(docker_file, release.joinpath('Dockerfile'))

    call([
        'docker', 'build', '-t', task.image_name + ":" + task.image_tag,
        release.__str__()
    ])

    shutil.rmtree(release)
 def store_artifact(self, path: List[Any], name: Any, object: Any):
     output_path = self.model_folder
     for path_item in path:
         output_path /= str(path_item)
     os.makedirs(str(output_path), exist_ok=True)
     output_path /= str(name)
     FileIO.write_pickle(object, output_path)
     self.log(f"Saved artifact {output_path}")
 def unpickle(self, subpath) -> Any:
     if self.hacked_unpickling:
         package = FileIO.read_jsonpickle(
             self.path.joinpath('package.json'))
         with open(str(self.path.joinpath(subpath)), 'rb') as file:
             unpickler = HackedUnpicker(file, package['tg_module_name'],
                                        package['original_tg_module_name'])
             return unpickler.load()
     else:
         return FileIO.read_pickle(self.path.joinpath(subpath))
    def test_ordinary_unpickling(self):
        FileIO.write_pickle(class_a.TestClass(), 'test.pkl')

        ta = FileIO.read_pickle('test.pkl')
        self.assertIsInstance(ta, class_a.TestClass)
        self.assertEqual('A',ta.get_value())

        with open('test.pkl','rb') as file_obj:
            tb = HackedUnpicker(file_obj, 'tg.common.test_common.test_delivery.test_packaging.class_a','tg.common.test_common.test_delivery.test_packaging.class_b').load()
        self.assertIsInstance(tb, class_b.TestClass)
        self.assertEqual('B',tb.get_value())

        os.remove('test.pkl')
Beispiel #6
0
 def get_data(self):
     try:
         length = int(FileIO.read_text(self.length_path))
     except:
         length = None
     q = self._self_get_reading_query()
     return Queryable(q, length)
def execute(entry: EntryPoint):
    folder = Path('/opt/ml/model')
    env = FileCacheTrainingEnvironment(_myprint, folder)

    hyperparams = FileIO.read_json('/opt/ml/input/config/hyperparameters.json')
    if '_tuning_objective_metric' in hyperparams:
        del hyperparams['_tuning_objective_metric']

    model = env.common_initialization(entry, Path('/opt/ml/code/package.tar.gz'),{}, 'model')
    env.log('Starting training now...')
    model.run_with_environment('/opt/ml/input/data/training/', env)
 def _parse_base(self):
     text = self.mock
     if self.mock is None:
         text = FileIO.read_text(self.file_path)
     current = None
     parser = HeaderParser()
     for line in text.split('\n'):
         line = InterFormatParser._circumvent_separator_problems(line)
         resp = parser.observe(line)
         if resp == HeaderParseResponse.Ignore:
             continue
         if resp == HeaderParseResponse.NewTextBlock:
             if current is not None:
                 yield current
             current = None
         if current is None:
             current = ([], parser.get_header_tags())
         current[0].append(line)
     if current is not None:
         yield current
    def test_zip_file(self):
        src = Query.en(range(10))
        path = Path(__file__).parent.joinpath('test_cache')
        cache = ZippedFileDataSource(path, buffer_size=4)

        self.assertEqual(False, cache.is_available())
        cache.cache_from(src, 7)
        self.assertEqual(True, cache.is_available())

        self.assertEqual(
            "7", FileIO.read_text(path.__str__() + '.pkllines.zip.length'))

        stored = Query.file.zipped_folder(path.__str__() +
                                          '.pkllines.zip').to_dictionary()
        self.assertEqual(2, len(stored))
        self.assertListEqual([0, 1, 2, 3], stored['0'])
        self.assertListEqual([4, 5, 6], stored['1'])

        result = cache.get_data().to_list()
        self.assertListEqual(list(range(7)), result)

        os.unlink(path.__str__() + '.pkllines.zip.length')
        os.unlink(path.__str__() + '.pkllines.zip')
    def common_initialization(self,
                              entry: EntryPoint,
                              package_location: Optional[Path] = None,
                              hyperparams: Optional[Dict] = None,
                              resource_name='model') -> AbstractTrainingTask:
        self.log('Common Training Initialization')

        self.log('Preparing package properties...')
        package_props = entry.get_properties()
        props_str = json.dumps(package_props)
        self.log(props_str)
        FileIO.write_text(props_str, self.get_file_name('package.json'))

        if package_location is not None:
            self.log('Preparing package file...')
            shutil.copy(str(package_location),
                        str(self.get_file_name('package.tag.gz')))

        self.log('Loading model from package...')
        model = entry.load_resource(resource_name)

        if hyperparams is None:
            self.log("No hyperparameters are provided")
        else:
            self.log("Hyperparameters are provided")
            self.log(hyperparams)
            self.log("Storing Hyperparameters in file")
            FileIO.write_json(hyperparams,
                              self.get_file_name('hyperparameters.json'))
            self.log(f'Applying hyperparams')
            model.apply_hyperparams(hyperparams)

        self.log("Model initialized. Jsonpickling...")
        model_state = json.dumps(json.loads(jsonpickle.dumps(model)), indent=1)
        FileIO.write_text(model_state, self.get_file_name('task.json'))

        self.log("Common Training Initialization completed")
        return model
Beispiel #11
0
def make_package(
        task: PackagingTask,
        dst_location: Optional[Union[Path, str]] = None) -> PackageInfo:
    """
    Creates the package out of the :class:``PackagingTask``, and returns :class:``PackagingInfo``` describing this package
    """
    if dst_location is None:
        dst_location = Loc.temp_path.joinpath('release/package')
    elif isinstance(dst_location, str):
        dst_location = Path(dst_location)
    elif not isinstance(dst_location, Path):
        raise ValueError(
            f'dst_location was {dst_location}, while str or Path is expected')
    if not os.path.isdir(dst_location):
        os.makedirs(dst_location, exist_ok=True)

    root = Loc.tg_path  # type:Path
    release = Loc.temp_path.joinpath('release/package_tmp')  # type:Path
    try:
        shutil.rmtree(release.__str__())
    except:
        pass
    os.makedirs(release.__str__())

    full_module_name = _full_module_name(task.name, task.version)
    lib = release.joinpath(full_module_name)

    shutil.copytree(root.__str__(), lib.joinpath(Loc.tg_name).__str__())

    resources = lib.joinpath('resources')  # type: Path
    os.makedirs(resources.__str__())

    props = dict(
        module_name=task.name,
        version=task.version,
        full_module_name=full_module_name,
        dependencies=','.join(f"'{z}'" for dep_list in task.dependencies
                              for z in dep_list.dependencies),
        tg_name=Loc.tg_name,
        full_tg_name=full_module_name + '.' + Loc.tg_name,
    )

    for key, value in task.payload.items():
        FileIO.write_pickle(value, resources.joinpath(key))

    FileIO.write_text(_MANIFEST_TEMPLATE.format(**props),
                      release.joinpath('MANIFEST.in'))
    FileIO.write_text(_SETUP_TEMPLATE.format(**props),
                      release.joinpath('setup.py'))
    FileIO.write_json(props, release.joinpath('properties.json'))

    FileIO.write_text(_INIT_TEMPLATE.format(**props),
                      lib.joinpath('__init__.py'))

    pwd = os.getcwd()
    os.chdir(release.__str__())

    subprocess.call([sys.executable, 'setup.py', 'sdist'])

    os.chdir(pwd)

    file = Query.folder(release.joinpath('dist')).single()

    dst_location = dst_location.joinpath(
        f'{full_module_name}-{task.version}.tar.gz')

    shutil.copy(file.__str__(), dst_location.__str__())
    shutil.rmtree(release.__str__())
    return PackageInfo(task, full_module_name, dst_location)
def _replace_file(suffix):
    path = Path(__file__).parent.joinpath('class_hmr.py')
    FileIO.write_text(_TEMPLATE.format(suffix), path)