class TestCreatePipeline(HypergolCreateTestCase): def __init__(self, methodName): super(TestCreatePipeline, self).__init__(projectName='TestProject', methodName=methodName) self.allPaths = [ Path(self.projectDirectory, 'test_pipeline.sh'), Path(self.projectDirectory, 'pipelines', 'test_pipeline.py'), Path(self.projectDirectory, 'pipelines'), Path(self.projectDirectory) ] self.project = None def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory=self.projectDirectory, repoManager=TestRepoManager(raiseIfDirty=False) ) self.project.create_project_directory() self.project.create_pipelines_directory() def test_create_pipeline_creates_files(self): create_pipeline(pipeLineName='TestPipeline', projectDirectory=self.projectDirectory) for filePath in self.allPaths: self.assertEqual(os.path.exists(filePath), True) @mock.patch('hypergol.cli.create_pipeline.HypergolProject.check_dependencies') @mock.patch('hypergol.cli.create_pipeline.HypergolProject.is_data_model_class', side_effect=lambda x: x.asClass in ['DataModelTestClass']) @mock.patch('hypergol.cli.create_pipeline.HypergolProject.is_task_class', side_effect=lambda x: x.asClass in ['OtherTask', 'ExampleSource']) def test_create_pipeline_creates_content(self, mock_is_task_class, mock_is_data_model_class, mock_check_dependencies): content, scriptContent = create_pipeline('TestPipeline', 'DataModelTestClass', 'ExampleSource', 'OtherTask', projectDirectory='test_project', dryrun=True) self.assertEqual(content, TEST_CONTENT) self.assertEqual(scriptContent, TEST_SHELL)
def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory=self.projectDirectory, repoManager=TestRepoManager(raiseIfDirty=False)) self.project.create_project_directory() self.project.create_models_directory()
def create_old_data_model(commit, *args, projectDirectory='.', dryrun=None, force=None): """Create an older version of a data model class from git Please see :func:`~hypergol.hypergol_project.HypergolProject.create_old_data_model` in :class:`HypergolProject` for details. """ project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) project.create_old_data_model(commit, *args)
def diff_data_model(commit, *args, projectDirectory='.', dryrun=None, force=None): """Convenience function to compare old data model class definitions to the current one Please see :func:`~hypergol.hypergol_project.HypergolProject.diff_data_model` in :class:`HypergolProject` for details. """ project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) project.diff_data_model(commit, *args)
def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory='.', dryrun=True, force=None, repoManager=TestRepoManager(raiseIfDirty=False))
def setUp(self): super().setUp() self.inputDataset = self.create_test_dataset( dataset=self.datasetFactory.get(dataType=DataClass1, name='data1'), content=[ DataClass1(id_=k, value1=k + 1) for k in range(self.batchSize) ]) self.outputDataset = self.datasetFactory.get( dataType=ExampleOutputDataClass, name='exampleOutputDataset') self.batchProcessor = ExampleTensorflowBatchProcessor( inputDataset=self.inputDataset, inputBatchSize=self.batchSize, outputDataset=self.outputDataset) self.modelBlock = ExampleTrainableBlock(requiredOutputSize=1) self.model = TensorflowModelExample(exampleBlock=self.modelBlock) self.project = HypergolProject( projectDirectory='DOESNOTEXIST', dataDirectory=self.location, repoManager=TestRepoManager(raiseIfDirty=False)) self.modelManager = TensorflowModelManager( model=self.model, optimizer=tf.keras.optimizers.Adam(lr=1), batchProcessor=self.batchProcessor, project=self.project, restoreWeightsPath=None) self.modelManager.project.tensorboardPath = Path( self.location, self.projectName, 'tensorboard', self.branch) self.modelManager.project.modelDataPath = Path(self.location, self.projectName, self.branch, 'models') self.modelManager.start()
def list_datasets(dataDirectory, projectDirectory='.', pattern=None): """Convenience function to list existing datasets in the project Parameters ---------- dataDirectory : string location of the project data projectDirectory : string (default ``.``) location of the data directory Please see :func:`~hypergol.hypergol_project.HypergolProject.list_datasets` in :class:`HypergolProject` for details. """ project = HypergolProject(projectDirectory=projectDirectory, dataDirectory=dataDirectory, dryrun=False, force=False) project.list_datasets(pattern=pattern, asCode=True)
def create_model_block(className, projectDirectory='.', dryrun=None, force=None): """Generates a Model Block class. The file will be located in ``project_name/models/blocks/block_name.py`` Parameters ---------- className : string (CamelCase) Name of the class to be created """ project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) className = NameString(className) content = project.render( templateName='model_block.py.j2', templateData={'className': className}, filePath=Path(projectDirectory, 'models', 'blocks', className.asFileName) ) return project.cli_final_message(creationType='ModelBlock', name=className, content=(content, ))
class TestCreateModelBlock(HypergolCreateTestCase): def __init__(self, methodName): super(TestCreateModelBlock, self).__init__(projectName='TestProject', methodName=methodName) self.allPaths = [ Path(self.projectDirectory, 'models', 'blocks', 'test_model_block.py'), Path(self.projectDirectory, 'models', 'blocks'), Path(self.projectDirectory, 'models'), Path(self.projectDirectory) ] self.project = None def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory=self.projectDirectory, repoManager=TestRepoManager(raiseIfDirty=False)) self.project.create_project_directory() self.project.create_models_directory() self.project.create_blocks_directory() def test_create_model_block_creates_files(self): create_model_block(className='TestModelBlock', projectDirectory=self.projectDirectory) for filePath in self.allPaths: self.assertEqual(os.path.exists(filePath), True) def test_create_model_block_creates_content(self): content = create_model_block(className='TestModelBlock', projectDirectory=self.projectDirectory, dryrun=True) self.assertEqual(content[0], TEST_MODEL_BLOCK)
class TestCreateTask(HypergolCreateTestCase): def __init__(self, methodName): super(TestCreateTask, self).__init__(projectName='TestProject', methodName=methodName) self.allPaths = [ Path(self.projectDirectory, 'tasks', 'test_task.py'), Path(self.projectDirectory, 'tasks'), Path(self.projectDirectory) ] self.project = None self.maxDiff = 10000 def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory=self.projectDirectory, repoManager=TestRepoManager(raiseIfDirty=False)) self.project.create_project_directory() self.project.create_tasks_directory() def test_create_task_creates_files(self): create_task(className='TestTask', projectDirectory=self.projectDirectory) for filePath in self.allPaths: self.assertEqual(os.path.exists(filePath), True) def test_create_task_creates_content(self): content = create_task(className='TestTask', projectDirectory=self.projectDirectory, dryrun=True) self.assertEqual(content[0], TEST_TASK) def test_create_task_creates_content_source(self): content = create_task(className='TestSource', source=True, projectDirectory=self.projectDirectory, dryrun=True) self.assertEqual(content[0], TEST_SOURCE)
class TestCreateModel(HypergolCreateTestCase): def __init__(self, methodName): super(TestCreateModel, self).__init__(projectName='TestProject', methodName=methodName) self.allPaths = [ Path(self.projectDirectory, 'models', 'test_model', 'test_model.py'), Path(self.projectDirectory, 'models', 'test_model', 'test_model_batch_processor.py'), Path(self.projectDirectory, 'models', 'test_model', 'train_test_model.py'), Path(self.projectDirectory, 'models', 'test_model', 'serve_test_model.py'), Path(self.projectDirectory, 'models', 'test_model'), Path(self.projectDirectory, 'models'), Path(self.projectDirectory, 'train_test_model.sh'), Path(self.projectDirectory, 'serve_test_model.sh'), Path(self.projectDirectory) ] self.project = None self.maxDiff = 30000 def setUp(self): super().setUp() self.project = HypergolProject( projectDirectory=self.projectDirectory, repoManager=TestRepoManager(raiseIfDirty=False)) self.project.create_project_directory() self.project.create_models_directory() @mock.patch( 'hypergol.cli.create_pipeline.HypergolProject.check_dependencies') def test_create_model_creates_files(self, mock_check_dependencies): create_model(modelName='TestModel', trainingClass='TestTrainingClass', evaluationClass='TestEvaluationClass', inputClass='TestInput', outputClass='TestOutput', projectDirectory=self.projectDirectory) for filePath in self.allPaths: self.assertEqual(os.path.exists(filePath), True) @mock.patch( 'hypergol.cli.create_pipeline.HypergolProject.check_dependencies') @mock.patch( 'hypergol.cli.create_pipeline.HypergolProject.is_model_block_class', side_effect=lambda x: x.asClass in ['TestBlock1', 'TestBlock2']) def test_create_model_creates_content(self, mock_is_model_block_class, mock_check_dependencies): content, batchProcessorContent, trainModelContent, scriptContent, serveContent, serveScriptContent = create_model( 'TestModel', 'TestTrainingClass', 'TestEvaluationClass', 'TestInput', 'TestOutput', 'TestBlock1', 'TestBlock2', projectDirectory=self.projectDirectory, dryrun=True) self.assertEqual(content, TEST_CONTENT) self.assertEqual(batchProcessorContent, TEST_BATCH_PROCESSOR) self.assertEqual(trainModelContent, TEST_TRAIN_MODEL) self.assertEqual(scriptContent, TEST_SCRIPT) self.assertEqual(serveContent, TEST_SERVE) self.assertEqual(serveScriptContent, TEST_SERVE_SCRIPT)
def create_model(modelName, trainingClass, evaluationClass, inputClass, outputClass, *args, projectDirectory='.', dryrun=None, force=None): """Generates stubs for the Tensorflow model, data processing class and training script and shell script to run it from the command line. Shell scripts will be located in the project main directory (which should be the current directory when running them) and model files will be located in ``project_name/models/model_name/*.py``. After creation the user must implement the ``process_training_batch()`` , ``process_evaluation_batch()``, ``process_input_batch()`` and ``process_output_batch`` member functions that take ``trainingClass``, ``evaluationClass``, ``inputClass`` and ``outputClass`` respectively. The model must implement the ``get_loss()``, ``produce_metrics()`` and ``get_outputs()`` functions (see documentation of :class:`.BaseTensorflowModel` and the ``Tutorial`` for more detailed instructions) The training script is generated with example stubs that should be modified to align with the created model. Parameters ---------- modelName : string Name of the model trainingClass : BaseData Datamodel class (must exist) of the Dataset that contains the training data evaluationClass : BaseData Datamodel class (must exist) that will contain the evaluation data inputClass : BaseData Datamodel class (must exist) that will be used as the input when serving the model outputClass : BaseData Datamodel class (must exist) that will be returned as output when serving the model *args : BaseTensorflowModelBlock Names of blocks that will build up the model """ project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) modelName = NameString(modelName) trainingClass = NameString(trainingClass) evaluationClass = NameString(evaluationClass) inputClass = NameString(inputClass) outputClass = NameString(outputClass) blocks = [NameString(value) for value in args] project.check_dependencies( [trainingClass, evaluationClass, inputClass, outputClass] + blocks) project.create_model_directory(modelName=modelName) project.render_simple(templateName='__init__.py.j2', filePath=Path(project.modelsPath, modelName.asSnake, '__init__.py')) content = project.render(templateName='model.py.j2', templateData={ 'name': modelName, }, filePath=Path(projectDirectory, 'models', modelName.asSnake, modelName.asFileName)) batchProcessorContent = project.render( templateName='batch_processor.py.j2', templateData={ 'name': modelName, 'evaluationClass': evaluationClass, 'outputClass': outputClass, }, filePath=Path(projectDirectory, 'models', modelName.asSnake, f'{modelName.asSnake}_batch_processor.py')) trainModelContent = project.render( templateName='train_model.py.j2', templateData={ 'modelName': modelName, 'trainingClass': trainingClass, 'evaluationClass': evaluationClass, 'blockDependencies': [name for name in blocks if project.is_model_block_class(name)], }, filePath=Path(projectDirectory, 'models', modelName.asSnake, f'train_{modelName.asFileName}')) scriptContent = project.render_executable( templateName='train_model.sh.j2', templateData={'snakeName': modelName.asSnake}, filePath=Path(projectDirectory, f'train_{modelName.asSnake}.sh')) serveContent = project.render( templateName='serve_model.py.j2', templateData={ 'modelName': modelName, 'inputClass': inputClass, 'outputClass': outputClass }, filePath=Path(projectDirectory, 'models', modelName.asSnake, f'serve_{modelName.asFileName}')) serveScriptContent = project.render_executable( templateName='serve_model.sh.j2', templateData={'snakeName': modelName.asSnake}, filePath=Path(projectDirectory, f'serve_{modelName.asSnake}.sh')) return project.cli_final_message( creationType='Model', name=modelName, content=(content, batchProcessorContent, trainModelContent, scriptContent, serveContent, serveScriptContent))
def create_data_model(className, *args, projectDirectory='.', dryrun=None, force=None, project=None): """Generates domain class from the parameters derived from :class:`.BaseData` Fails if the target file already exists unless ``force=True`` or ``--force`` in CLI is set. Parameters ---------- className : string (CamelCase) Name of the class to be created projectDirectory : string (default='.') Location of the project directory, the code will be created in ``projectDirectory/data_models/class_name.py``. dryrun : bool (default=None) If set to ``True`` it returns the generated code as a string force : bool (default=None) If set to ``True`` it overwrites the target file *args : List of strings member variables string representation of the member variable in "name:type", "name:List[type]" or "name:type:id" format Returns ------- content : string The generated code if ``dryrun`` is specified """ if project is None: project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) dataModel = DataModel(className=NameString(className), project=project) for value in args: dataModel.process_inputs(value) temporalDependencies = sorted( list({m.type_ for m in dataModel.conversions if m.isTemporal})) dataModelDependencies = [{ 'snake': m.type_.asSnake, 'name': m.type_ } for m in dataModel.conversions if not m.isTemporal and not m.isObject] content = ( DataModelRenderer().add( 'from typing import List ', dataModel.isListDependent).add( 'from datetime import {0} ', temporalDependencies).add( ' ', dataModel.isListDependent or len(temporalDependencies) > 0).add('from hypergol import BaseData ').add( ' ', len(dataModelDependencies) > 0).add( 'from data_models.{snake} import {name}', dataModelDependencies). add(' ').add( ' ').add( 'class {className}(BaseData): ', className=dataModel.className ).add(' ').add( ' def __init__(self, {arguments}): ', arguments=', '.join( dataModel.arguments)).add( ' self.{0} = {0} ', dataModel.names).add( ' ', len(dataModel.ids) > 0).add( ' def get_id(self): ', len(dataModel.ids) > 0).add( ' return ({idString}, ) ', len(dataModel.ids) > 0, idString=', '.join(dataModel.ids)). add(' ', len(dataModel.conversions) > 0).add( ' def to_data(self): ', len(dataModel.conversions) > 0).add( ' data = self.__dict__.copy() ', len(dataModel.conversions) > 0). add(" data['{name}'] = BaseData.to_string(data['{name}']) ", [{ 'name': m.name } for m in dataModel.conversions if m.isObject]). add(" data['{name}'] = data['{name}'].{conv}() ", [{ 'name': m.name, 'conv': m.to_ } for m in dataModel.conversions if not m.isList and not m.isObject]). add(" data['{name}'] = [v.{conv}() for v in data['{name}']] ", [{ 'name': m.name, 'conv': m.to_ } for m in dataModel.conversions if m.isList]). add(' return data ', len(dataModel.conversions) > 0). add(' ', len(dataModel.conversions) > 0). add(' @classmethod ', len(dataModel.conversions) > 0). add(' def from_data(cls, data): ', len(dataModel.conversions) > 0). add( " data['{name}'] = BaseData.from_string(data['{name}']) ", [{ 'name': m.name } for m in dataModel.conversions if m.isObject] ).add( " data['{name}'] = {type_}.{conv}(data['{name}']) ", [{ 'name': m.name, 'type_': str(m.type_), 'conv': m.from_ } for m in dataModel.conversions if not m.isList and not m.isObject] ).add( " data['{name}'] = [{type_}.{conv}(v) for v in data['{name}']] ", [{ 'name': m.name, 'type_': str(m.type_), 'conv': m.from_ } for m in dataModel.conversions if m.isList] ).add(' return cls(**data) ', len(dataModel.conversions) > 0)).get() project.create_text_file(content=content, filePath=Path(project.dataModelsPath, dataModel.className.asFileName)) project.render(templateName='test_data_models.py.j2', templateData={ 'name': dataModel.className, 'initialisations': ', '.join(dataModel.initialisations) }, filePath=Path(project.testsPath, f'test_{dataModel.className.asFileName}')) return project.cli_final_message(creationType='Class', name=dataModel.className, content=(content, ))
def create_project(projectName, dryrun=None, force=None): """Generates the project directories and files Fails if the target directory already exists unless ``force=True`` or ``--force`` in CLI is set. Directories: - ``data_models`` with ``__init__.py`` - ``pipelines`` with ``__init__.py`` - ``tasks`` with ``__init__.py`` - ``models`` with ``__init__.py`` - ``models\blocks`` with ``__init__.py`` - ``tests`` Executables: - ``make_venv.sh`` to create a virtual environment - ``run_tests.sh`` to run tests - ``run_pylint.sh`` to run linting Misc: - ``requirements.txt`` - ``.gitignore`` - ``README.md`` - ``LICENSE`` <- Don't forget to add current year and your name or change it to the one you want - ``pylintrc`` Parameters ---------- projectName : string (CamelCase) Name of the project to be created dryrun : bool (default=None) If set to ``True`` it returns the generated code as a string force : bool (default=None) If set to ``True`` it overwrites the target file """ projectName = NameString(projectName) project = HypergolProject(projectDirectory=projectName.asSnake, dryrun=dryrun, force=force) project.create_project_directory() project.create_data_models_directory() project.render_simple(templateName='__init__.py.j2', filePath=Path(project.dataModelsPath, '__init__.py')) project.create_tasks_directory() project.render_simple(templateName='__init__.py.j2', filePath=Path(project.tasksPath, '__init__.py')) project.create_pipelines_directory() project.render_simple(templateName='__init__.py.j2', filePath=Path(project.pipelinesPath, '__init__.py')) project.create_models_directory() project.render_simple(templateName='__init__.py.j2', filePath=Path(project.modelsPath, '__init__.py')) project.create_blocks_directory() project.render_simple(templateName='__init__.py.j2', filePath=Path(project.blocksPath, '__init__.py')) project.create_tests_directory() makeVenvScript = project.render_executable(templateName='make_venv.sh.j2', templateData={}, filePath=Path( project.projectDirectory, 'make_venv.sh')) runTestScript = project.render_executable(templateName='run_tests.sh.j2', templateData={}, filePath=Path( project.projectDirectory, 'run_tests.sh')) runPylintScript = project.render_executable( templateName='run_pylint.sh.j2', templateData={}, filePath=Path(project.projectDirectory, 'run_pylint.sh')) requirementsContent = project.render_simple( templateName='requirements.txt.j2', filePath=Path(project.projectDirectory, 'requirements.txt')) gitignoreContent = project.render_simple(templateName='.gitignore.j2', filePath=Path( project.projectDirectory, '.gitignore')) readmeContent = project.render_simple(templateName='README.md.j2', filePath=Path( project.projectDirectory, 'README.md')) licenseContent = project.render_simple(templateName='LICENSE.j2', filePath=Path( project.projectDirectory, 'LICENSE')) pylintrcContent = project.render_simple(templateName='pylintrc.j2', filePath=Path( project.projectDirectory, 'pylintrc')) allContent = (makeVenvScript, runTestScript, runPylintScript, requirementsContent, gitignoreContent, readmeContent, licenseContent, pylintrcContent) return project.cli_final_message(creationType='Project', name=projectName, content=allContent)
def create_pipeline(pipeLineName, *args, projectDirectory='.', dryrun=None, force=None, project=None): """Generates a pipeline script from the parameters Fails if the target file already exists unless ``force=True`` or ``--force`` in CLI is set. Generates pipe_line_name.py in pipelines, imports all the classes listed in ``*args`` and creates stubs for them to be filled. Also creates the executable ``pipe_line_name.sh`` in the project directory with examples how to pass parameters from the shell. Parameters ---------- pipeLineName : string (CamelCase) Name of the pipeline to be created projectDirectory : string (default='.') Location of the project directory, the code will be created in ``projectDirectory/data_models/class_name.py``. dryrun : bool (default=None) If set to ``True`` it returns the generated code as a string force : bool (default=None) If set to ``True`` it overwrites the target file *args : List of strings (CamelCase) Classes to be imported into the generated code from the data model, fails if class not found in either ``data_models`` or ``tasks`` Returns ------- content : string The generated code if ``dryrun`` is specified scriptContent : string The generated shell script to run the pipeline if ``dryrun`` is specified """ if project is None: project = HypergolProject(projectDirectory=projectDirectory, dryrun=dryrun, force=force) pipeLineName = NameString(pipeLineName) dependencies = [NameString(value) for value in args] project.check_dependencies(dependencies) content = project.render( templateName='pipeline.py.j2', templateData={ 'snakeName': pipeLineName.asSnake, 'taskDependencies': [name for name in dependencies if project.is_task_class(name)], 'dataModelDependencies': [ name for name in dependencies if project.is_data_model_class(name) ] }, filePath=Path(projectDirectory, 'pipelines', pipeLineName.asFileName)) scriptContent = project.render_executable( templateName='pipeline.sh.j2', templateData={'snakeName': pipeLineName.asSnake}, filePath=Path(projectDirectory, f'{pipeLineName.asSnake}.sh')) return project.cli_final_message(creationType='PipeLine', name=pipeLineName, content=(content, scriptContent))