def run_workflow(self, datastore): """Test functionality to execute a Python script that creates a dataset containing unicode characters.""" f_handle = self.filestore.upload_file(CSV_FILE) ds = datastore.load_dataset(f_handle) # RUN Python Script cmd = python_cell( source=PYTHON_SCRIPT, validate=True ) result = PyCellTaskProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=datastore, filestore=self.filestore, datasets={DATASET_NAME: ds.identifier} ) ) self.assertTrue(result.is_success) #print wf.modules[-1].stdout[0]['data'] ds = datastore.get_dataset(result.provenance.write[DATASET_NAME].identifier) names = set(c.name.upper().replace('_', ' ') for c in ds.columns) self.assertTrue(len(names), 4) for name in ['DATE', 'IMO CODE', 'PORT OF DEPARTURE', 'PLACE OF RECEIPT']: self.assertTrue(name in names)
def setUp(self): """Create an instance of the default vizier processor for an empty server directory. """ # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) vizual = VizualTaskProcessor(api=DefaultVizualApi()) pycell = PyCellTaskProcessor() projects = CommonProjectCache( datastores=FileSystemDatastoreFactory(DATASTORES_DIR), filestores=FileSystemFilestoreFactory(FILESTORES_DIR), viztrails=OSViztrailRepository(base_path=VIZTRAILS_DIR)) self.PROJECT_ID = projects.create_project().identifier self.backend = SynchronousTaskEngine(commands={ PACKAGE_PYTHON: { PYTHON_CODE: pycell }, PACKAGE_VIZUAL: { VIZUAL_LOAD: vizual, VIZUAL_UPD_CELL: vizual } }, projects=projects)
def test_simple_script(self): """Test running the simple python script.""" cmd = python_cell(source='print 2+2', validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore, datasets=dict())) self.assertTrue(result.is_success) self.assertEqual(result.outputs.stdout[0].value, '4')
def test_unknown_dataset_script(self): """Test running a script that accesses an unknown datasets.""" fh = self.filestore.upload_file(CSV_FILE) ds = self.datastore.load_dataset(fh) cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore, project_id=6, artifacts={'people': ds})) self.assertFalse(result.is_success) self.assertTrue(result.provenance.read == {}) self.assertTrue(result.provenance.write == {}) self.assertEqual(len(result.outputs.stdout), 0) self.assertEqual(len(result.outputs.stderr), 1) # Running a similar script that catches the error schould be a success # and the access to the dataset should be recorded in the resulting # read provenance cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY_WITH_TRY_CATCH, validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore, project_id=6, artifacts={'people': ds})) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 1) self.assertEqual(len(result.provenance.write), 0) self.assertTrue('employees' in result.provenance.read) self.assertIsNone(result.provenance.read['employees']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0)
def test_create_dataset_script(self): """Test running a script that creates a new datasets.""" cmd = python_cell(source=CREATE_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore)) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 0) self.assertEqual(len(result.provenance.write), 1) self.assertTrue('people' in result.provenance.write) self.assertIsNotNone(result.provenance.write['people']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0) self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob')
def test_print_dataset_script(self): """Test running a script that prints rows in an existing datasets.""" fh = self.filestore.upload_file(CSV_FILE) ds = self.datastore.load_dataset(fh) cmd = python_cell(source=PRINT_DATASET_PY, validate=True) result = PyCellTaskProcessor().compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext(datastore=self.datastore, filestore=self.filestore, datasets={'people': ds.identifier})) self.assertTrue(result.is_success) self.assertIsNotNone(result.provenance.read) self.assertIsNotNone(result.provenance.write) self.assertEqual(len(result.provenance.read), 1) self.assertEqual(len(result.provenance.write), 0) self.assertTrue('people' in result.provenance.read) self.assertIsNotNone(result.provenance.read['people']) self.assertEqual(len(result.outputs.stdout), 1) self.assertEqual(len(result.outputs.stderr), 0) self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob')
def setUp(self): """Create an instance of the default vizier processor for an empty server directory. """ # Drop directory if it exists if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) os.makedirs(SERVER_DIR) projects = CommonProjectCache( datastores=FileSystemDatastoreFactory(DATASTORES_DIR), filestores=FileSystemFilestoreFactory(FILESTORES_DIR), viztrails=OSViztrailRepository(base_path=VIZTRAILS_DIR)) self.PROJECT_ID = projects.create_project().identifier self.backend = MultiProcessBackend(processors={ PACKAGE_PYTHON: PyCellTaskProcessor(), PACKAGE_VIZUAL: VizualTaskProcessor(api=MimirVizualApi()), 'error': FakeTaskProcessor() }, projects=projects)