def run_workflow(self, datastore):
     """Test functionality to execute a Python script that creates a dataset
     containing unicode characters."""
     f_handle = self.filestore.upload_file(CSV_FILE)
     ds = datastore.load_dataset(f_handle)
     # RUN Python Script
     cmd = python_cell(
         source=PYTHON_SCRIPT,
         validate=True
     )
     result = PyCellTaskProcessor().compute(
         command_id=cmd.command_id,
         arguments=cmd.arguments,
         context=TaskContext(
             datastore=datastore,
             filestore=self.filestore,
             datasets={DATASET_NAME: ds.identifier}
         )
     )
     self.assertTrue(result.is_success)
     #print wf.modules[-1].stdout[0]['data']
     ds = datastore.get_dataset(result.provenance.write[DATASET_NAME].identifier)
     names = set(c.name.upper().replace('_', ' ') for c in ds.columns)
     self.assertTrue(len(names), 4)
     for name in ['DATE', 'IMO CODE', 'PORT OF DEPARTURE', 'PLACE OF RECEIPT']:
         self.assertTrue(name in names)
 def setUp(self):
     """Create an instance of the default vizier processor for an empty server
     directory.
     """
     # Drop directory if it exists
     if os.path.isdir(SERVER_DIR):
         shutil.rmtree(SERVER_DIR)
     os.makedirs(SERVER_DIR)
     vizual = VizualTaskProcessor(api=DefaultVizualApi())
     pycell = PyCellTaskProcessor()
     projects = CommonProjectCache(
         datastores=FileSystemDatastoreFactory(DATASTORES_DIR),
         filestores=FileSystemFilestoreFactory(FILESTORES_DIR),
         viztrails=OSViztrailRepository(base_path=VIZTRAILS_DIR))
     self.PROJECT_ID = projects.create_project().identifier
     self.backend = SynchronousTaskEngine(commands={
         PACKAGE_PYTHON: {
             PYTHON_CODE: pycell
         },
         PACKAGE_VIZUAL: {
             VIZUAL_LOAD: vizual,
             VIZUAL_UPD_CELL: vizual
         }
     },
                                          projects=projects)
 def test_simple_script(self):
     """Test running the simple python script."""
     cmd = python_cell(source='print 2+2', validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                datasets=dict()))
     self.assertTrue(result.is_success)
     self.assertEqual(result.outputs.stdout[0].value, '4')
Beispiel #4
0
 def test_unknown_dataset_script(self):
     """Test running a script that accesses an unknown datasets."""
     fh = self.filestore.upload_file(CSV_FILE)
     ds = self.datastore.load_dataset(fh)
     cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY, validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                project_id=6,
                                                artifacts={'people': ds}))
     self.assertFalse(result.is_success)
     self.assertTrue(result.provenance.read == {})
     self.assertTrue(result.provenance.write == {})
     self.assertEqual(len(result.outputs.stdout), 0)
     self.assertEqual(len(result.outputs.stderr), 1)
     # Running a similar script that catches the error schould be a success
     # and the access to the dataset should be recorded in the resulting
     # read provenance
     cmd = python_cell(source=PRINT_UNKNOWN_DATASET_PY_WITH_TRY_CATCH,
                       validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore,
                                                project_id=6,
                                                artifacts={'people': ds}))
     self.assertTrue(result.is_success)
     self.assertIsNotNone(result.provenance.read)
     self.assertIsNotNone(result.provenance.write)
     self.assertEqual(len(result.provenance.read), 1)
     self.assertEqual(len(result.provenance.write), 0)
     self.assertTrue('employees' in result.provenance.read)
     self.assertIsNone(result.provenance.read['employees'])
     self.assertEqual(len(result.outputs.stdout), 1)
     self.assertEqual(len(result.outputs.stderr), 0)
 def test_create_dataset_script(self):
     """Test running a script that creates a new datasets."""
     cmd = python_cell(source=CREATE_DATASET_PY, validate=True)
     result = PyCellTaskProcessor().compute(command_id=cmd.command_id,
                                            arguments=cmd.arguments,
                                            context=TaskContext(
                                                datastore=self.datastore,
                                                filestore=self.filestore))
     self.assertTrue(result.is_success)
     self.assertIsNotNone(result.provenance.read)
     self.assertIsNotNone(result.provenance.write)
     self.assertEqual(len(result.provenance.read), 0)
     self.assertEqual(len(result.provenance.write), 1)
     self.assertTrue('people' in result.provenance.write)
     self.assertIsNotNone(result.provenance.write['people'])
     self.assertEqual(len(result.outputs.stdout), 1)
     self.assertEqual(len(result.outputs.stderr), 0)
     self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob')
 def test_print_dataset_script(self):
     """Test running a script that prints rows in an existing datasets."""
     fh = self.filestore.upload_file(CSV_FILE)
     ds = self.datastore.load_dataset(fh)
     cmd = python_cell(source=PRINT_DATASET_PY, validate=True)
     result = PyCellTaskProcessor().compute(
         command_id=cmd.command_id,
         arguments=cmd.arguments,
         context=TaskContext(datastore=self.datastore,
                             filestore=self.filestore,
                             datasets={'people': ds.identifier}))
     self.assertTrue(result.is_success)
     self.assertIsNotNone(result.provenance.read)
     self.assertIsNotNone(result.provenance.write)
     self.assertEqual(len(result.provenance.read), 1)
     self.assertEqual(len(result.provenance.write), 0)
     self.assertTrue('people' in result.provenance.read)
     self.assertIsNotNone(result.provenance.read['people'])
     self.assertEqual(len(result.outputs.stdout), 1)
     self.assertEqual(len(result.outputs.stderr), 0)
     self.assertEqual(result.outputs.stdout[0].value, 'Alice\nBob')
Beispiel #7
0
 def setUp(self):
     """Create an instance of the default vizier processor for an empty server
     directory.
     """
     # Drop directory if it exists
     if os.path.isdir(SERVER_DIR):
         shutil.rmtree(SERVER_DIR)
     os.makedirs(SERVER_DIR)
     projects = CommonProjectCache(
         datastores=FileSystemDatastoreFactory(DATASTORES_DIR),
         filestores=FileSystemFilestoreFactory(FILESTORES_DIR),
         viztrails=OSViztrailRepository(base_path=VIZTRAILS_DIR))
     self.PROJECT_ID = projects.create_project().identifier
     self.backend = MultiProcessBackend(processors={
         PACKAGE_PYTHON:
         PyCellTaskProcessor(),
         PACKAGE_VIZUAL:
         VizualTaskProcessor(api=MimirVizualApi()),
         'error':
         FakeTaskProcessor()
     },
                                        projects=projects)