Esempio n. 1
0
 def test_viztrail_workflow(self):
     """Test basic functionality of retrieving a workflow.
     """
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     viztrail = repo.create_viztrail(ENGINEENV_DEFAULT, {'name': 'Name A'})
     self.assertEquals(len(repo.get_workflow(viztrail.identifier, DEFAULT_BRANCH).modules), 0)
     self.assertIsNone(repo.get_workflow(viztrail.identifier, 'unknown'))
     self.assertIsNone(repo.get_workflow('unknown', DEFAULT_BRANCH))
     self.assertIsNone(repo.get_workflow(viztrail_id=viztrail.identifier, branch_id=DEFAULT_BRANCH, workflow_version=10))
     # Re-load repository
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     self.assertEquals(len(repo.get_workflow(viztrail.identifier, DEFAULT_BRANCH).modules), 0)
     self.assertIsNone(repo.get_workflow(viztrail.identifier, 'unknown'))
     self.assertIsNone(repo.get_workflow('unknown', DEFAULT_BRANCH))
     self.assertIsNone(repo.get_workflow(viztrail_id=viztrail.identifier, branch_id=DEFAULT_BRANCH, workflow_version=10))
 def setUp(self):
     """Create an empty work trails repository."""
     # Clear VisTrails directory
     if os.path.isdir(VIZTRAILS_DIRECTORY):
         shutil.rmtree(VIZTRAILS_DIRECTORY)
     # Setup project repository
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
Esempio n. 3
0
 def test_viztrail_repository(self):
     """Test basic functionality of managing viztrails in the FS repository.
     """
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     self.assertEquals(len(repo.list_viztrails()), 0)
     # Create two viztrails
     vt1 = repo.create_viztrail(ENGINEENV_DEFAULT, {'name': 'Name A'})
     self.assertEquals(vt1.properties['name'], 'Name A')
     self.assertTrue(PACKAGE_VIZUAL in vt1.command_repository)
     self.assertTrue(PACKAGE_PYTHON in vt1.command_repository)
     self.assertFalse(PACKAGE_MIMIR in vt1.command_repository)
     vt2 = repo.create_viztrail(ENGINEENV_MIMIR, {'name': 'Name B'})
     self.assertEquals(vt2.properties['name'], 'Name B')
     self.assertTrue(PACKAGE_VIZUAL in vt2.command_repository)
     self.assertTrue(PACKAGE_PYTHON in vt2.command_repository)
     self.assertTrue(PACKAGE_MIMIR in vt2.command_repository)
     self.assertEquals(len(repo.list_viztrails()), 2)
     # Re-load the repository
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     self.assertEquals(len(repo.list_viztrails()), 2)
     vt1 = repo.get_viztrail(vt1.identifier)
     self.assertEquals(vt1.properties['name'], 'Name A')
     self.assertTrue(PACKAGE_VIZUAL in vt1.command_repository)
     self.assertTrue(PACKAGE_PYTHON in vt1.command_repository)
     self.assertFalse(PACKAGE_MIMIR in vt1.command_repository)
     vt2 = repo.get_viztrail(vt2.identifier)
     self.assertEquals(vt2.properties['name'], 'Name B')
     self.assertTrue(PACKAGE_VIZUAL in vt2.command_repository)
     self.assertTrue(PACKAGE_PYTHON in vt2.command_repository)
     self.assertTrue(PACKAGE_MIMIR in vt2.command_repository)
     # Delete the first viztrail
     self.assertTrue(repo.delete_viztrail(vt1.identifier))
     # Re-load the repository
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     self.assertEquals(len(repo.list_viztrails()), 1)
     self.assertIsNone(repo.get_viztrail(vt1.identifier))
     self.assertIsNotNone(repo.get_viztrail(vt2.identifier))
     vt2 = repo.list_viztrails()[0]
     self.assertEquals(vt2.properties['name'], 'Name B')
     self.assertTrue(PACKAGE_VIZUAL in vt2.command_repository)
     self.assertTrue(PACKAGE_PYTHON in vt2.command_repository)
     self.assertTrue(PACKAGE_MIMIR in vt2.command_repository)
     self.assertFalse(repo.delete_viztrail(vt1.identifier))
Esempio n. 4
0
 def test_viztrail_branches(self):
     """Test basic functionality of creating a branch.
     """
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     viztrail = repo.create_viztrail(ENGINEENV_DEFAULT, {'name': 'Name A'})
     # Branching of an unknown viztrail will return None
     self.assertIsNone(repo.create_branch('unknown', DEFAULT_BRANCH, {'name': 'My Branch'}))
     # Branching of an empty branch raises a ValueError
     with self.assertRaises(ValueError):
         repo.create_branch(viztrail.identifier, DEFAULT_BRANCH, {'name': 'My Branch'})
     # Re-load repository and repreat previous assertions
     repo = FileSystemViztrailRepository(VIZTRAIL_DIR, repos)
     self.assertIsNone(repo.create_branch('unknown', DEFAULT_BRANCH, {'name': 'My Branch'}))
     with self.assertRaises(ValueError):
         repo.create_branch(viztrail.identifier, DEFAULT_BRANCH, {'name': 'My Branch'})
     # The master branch provenance does not contain any information
     prov = repo.get_viztrail(viztrail.identifier).branches[DEFAULT_BRANCH].provenance
     self.assertIsNone(prov.source_branch)
     self.assertTrue(prov.workflow_version < 0)
     self.assertTrue(prov.module_id < 0)
Esempio n. 5
0
 def setUp(self):
     """Create an empty work trails repository."""
     # Create fresh set of directories
     for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]:
         if os.path.isdir(d):
             shutil.rmtree(d)
         os.mkdir(d)
     self.datastore = MimirDataStore(DATASTORE_DIR)
     self.fileserver = DefaultFileServer(FILESERVER_DIR)
     vizual = MimirVizualEngine(self.datastore, self.fileserver)
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIR,
                                            {ENV.identifier: ENV})
Esempio n. 6
0
 def set_up_default(self):
     """Setup configuration using default Vizual engine."""
     env = ExecEnv(
             FileServerConfig().from_dict({'directory': FILESERVER_DIR}),
             packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON]
         ).from_dict({'datastore': {'directory': DATASTORE_DIR}})
     self.ENGINE_ID = env.identifier
     self.set_up()
     self.datastore = FileSystemDataStore(DATASTORE_DIR)
     self.fileserver = DefaultFileServer(FILESERVER_DIR)
     self.db = FileSystemViztrailRepository(
         VIZTRAILS_DIR,
         {env.identifier: env}
     )
Esempio n. 7
0
 def setUp(self):
     """Create an empty work trails repository."""
     # Create fresh set of directories
     self.config = AppConfig()
     env = ExecEnv(
         FileServerConfig().from_dict({'directory': FILESERVER_DIR}),
         packages=[PACKAGE_VIZUAL, PACKAGE_PLOT
                   ]).from_dict({'datastore': {
                       'directory': DATASTORE_DIR
                   }})
     self.ENGINE_ID = env.identifier
     self.config.envs[self.ENGINE_ID] = env
     self.config.fileserver = env.fileserver
     for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]:
         if os.path.isdir(d):
             shutil.rmtree(d)
         os.mkdir(d)
     self.datastore = FileSystemDataStore(DATASTORE_DIR)
     self.fileserver = DefaultFileServer(FILESERVER_DIR)
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIR,
                                            {env.identifier: env})
     self.api = VizierWebService(self.db, self.datastore, self.fileserver,
                                 self.config)
Esempio n. 8
0
 def setUp(self):
     """Create an new Web Service API."""
     # Clear various directories
     for d in [WORKTRAILS_DIR, DATASTORE_DIR, FILESERVER_DIR]:
         if os.path.isdir(d):
             shutil.rmtree(d)
         os.mkdir(d)
     # Setup datastore and API
     self.config = AppConfig()
     self.ENV = ExecEnv(
         FileServerConfig().from_dict({'directory': FILESERVER_DIR}),
         packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON
                   ]).from_dict({'datastore': {
                       'directory': DATASTORE_DIR
                   }})
     self.ENGINE_ID = self.ENV.identifier
     self.config.envs[self.ENGINE_ID] = self.ENV
     self.config.fileserver = self.ENV.fileserver
     self.datastore = FileSystemDataStore(DATASTORE_DIR)
     self.fileserver = DefaultFileServer(FILESERVER_DIR)
     self.api = VizierWebService(
         FileSystemViztrailRepository(WORKTRAILS_DIR,
                                      {self.ENV.identifier: self.ENV}),
         self.datastore, self.fileserver, self.config)
Esempio n. 9
0
 def setUp(self):
     """Create an new Web Service API."""
     # Clear various directories
     for d in [WORKTRAILS_DIRECTORY, DATASTORE_DIRECTORY, FILESERVER_DIR]:
         if os.path.isdir(d):
             shutil.rmtree(d)
         os.mkdir(d)
     # Setup datastore and API
     self.config = AppConfig(configuration_file=CONFIG_FILE)
     self.fileserver = DefaultFileServer(FILESERVER_DIR)
     self.config.envs = {
         'default': TestEnv(),
         'blocked': self.config.envs[ENGINEENV_DEFAULT]
     }
     self.datastore = FileSystemDataStore(DATASTORE_DIRECTORY)
     self.api = VizierWebService(
         FileSystemViztrailRepository(
             WORKTRAILS_DIRECTORY,
             self.config.envs
         ),
         self.datastore,
         self.fileserver,
         self.config
     )
 def test_append_module(self):
     """Test appending modules."""
     # Create new viztrail.
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=load_dataset('file', 'name'))
     # The default branch should have two versions. The first versions contains
     # one module and the second version contains two modules
     self.assertEquals(len(vt.branches[DEFAULT_BRANCH].workflows), 2)
     v1 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[0].version)
     v2 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[1].version)
     head = self.db.get_workflow(viztrail_id=vt.identifier,
                                 branch_id=DEFAULT_BRANCH)
     self.assertEquals(len(v1.modules), 1)
     self.assertEquals(len(v2.modules), 2)
     self.assertEquals(len(head.modules), 2)
     # Ensure that all modules have non-negative identifier
     for m in head.modules:
         self.assertTrue(m.identifier >= 0)
     self.assertEquals(head.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(head.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(head.version, 1)
     # Re-load the viztrails to ensure that all information has been persisted properly
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     self.assertEquals(len(vt.branches[DEFAULT_BRANCH].workflows), 2)
     v1 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[0].version)
     v2 = self.db.get_workflow(
         viztrail_id=vt.identifier,
         workflow_version=vt.branches[DEFAULT_BRANCH].workflows[1].version)
     head = self.db.get_workflow(viztrail_id=vt.identifier,
                                 branch_id=DEFAULT_BRANCH)
     self.assertEquals(len(v1.modules), 1)
     self.assertEquals(len(v2.modules), 2)
     self.assertEquals(len(head.modules), 2)
     # Ensure that all modules have non-negative identifier
     for m in head.modules:
         self.assertTrue(m.identifier >= 0)
     self.assertEquals(head.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(head.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(head.version, 1)
     # Append a third moduel to the head of the default branch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('def'))
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(len(wf.modules), 3)
     for m in wf.modules:
         self.assertTrue(m.identifier >= 0)
         self.assertEquals(m.stdout[0]['data'],
                           'SUCCESS ' + str(m.identifier))
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_VIZUAL)
     self.assertEquals(wf.modules[2].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.version, 2)
     # Append a module to the first version in the branch. The resulting new
     # branch HEAD is expected to contain only two modules then.
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    workflow_version=0,
                                    command=python_cell('def'))
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     wf = self.db.get_workflow(viztrail_id=vt.identifier)
     self.assertEquals(len(wf.modules), 2)
     for m in wf.modules:
         self.assertTrue(m.identifier >= 0)
         self.assertEquals(m.stdout[0]['data'],
                           'SUCCESS ' + str(m.identifier))
     self.assertEquals(wf.modules[0].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.modules[1].command.module_type, PACKAGE_PYTHON)
     self.assertEquals(wf.version, 3)
 def test_branching(self):
     """Test functionality to execute a workflow module."""
     # Create new viztrail and ensure that it contains exactly one branch
     vt = self.db.create_viztrail(ENV.identifier, {'name': 'My Project'})
     self.assertEquals(len(vt.branches), 1)
     self.assertTrue(DEFAULT_BRANCH in vt.branches)
     self.assertEquals(vt.branches[DEFAULT_BRANCH].identifier,
                       DEFAULT_BRANCH)
     # Append two modules to the defaukt branch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=load_dataset('file', 'name'))
     # Create a branch at the end of the default branch. The new branch
     # contains one workflow with two modules the version number is 2
     newbranch = self.db.create_branch(viztrail_id=vt.identifier,
                                       properties={'name': 'New Branch'})
     self.assertEquals(len(newbranch.workflows), 1)
     self.assertEquals(newbranch.workflows[-1].version, 2)
     wf = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(wf.version, 2)
     self.assertEquals(len(wf.modules), 2)
     self.assertTrue(newbranch.identifier in vt.branches)
     # Ensure that everything has been persisted properly
     self.db = FileSystemViztrailRepository(VIZTRAILS_DIRECTORY,
                                            {ENV.identifier: ENV})
     vt = self.db.get_viztrail(vt.identifier)
     newbranch = vt.branches[newbranch.identifier]
     self.assertEquals(len(newbranch.workflows), 1)
     self.assertEquals(newbranch.workflows[-1].version, 2)
     wf = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(wf.version, 2)
     self.assertEquals(len(wf.modules), 2)
     self.assertTrue(newbranch.identifier in vt.branches)
     self.assertEquals(newbranch.properties.get_properties()['name'],
                       'New Branch')
     # Create a third branch from the start of the master branch
     thirdbranch = self.db.create_branch(viztrail_id=vt.identifier,
                                         properties={'name': 'Next Branch'},
                                         module_id=0)
     wf = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(wf.version, 3)
     self.assertEquals(len(wf.modules), 1)
     # Append modules at end of master and at beginning of thirdbranch
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    command=python_cell('abc'))
     self.db.append_workflow_module(viztrail_id=vt.identifier,
                                    branch_id=thirdbranch.identifier,
                                    command=python_cell('def'),
                                    before_id=0)
     master_head = vt.get_workflow()
     self.assertEquals(len(master_head.modules), 3)
     self.assertEquals(master_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(master_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     self.assertEquals(master_head.modules[2].command.module_type,
                       PACKAGE_PYTHON)
     b2_head = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(len(b2_head.modules), 2)
     self.assertEquals(b2_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b2_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     b3_head = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(len(b3_head.modules), 2)
     self.assertEquals(b3_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b3_head.modules[1].command.module_type,
                       PACKAGE_PYTHON)
     # Replace second module of third branch
     self.db.replace_workflow_module(
         viztrail_id=vt.identifier,
         branch_id=thirdbranch.identifier,
         module_id=b3_head.modules[1].identifier,
         command=load_dataset('file', 'name'))
     b3_head = vt.get_workflow(branch_id=thirdbranch.identifier)
     self.assertEquals(len(b3_head.modules), 2)
     self.assertEquals(b3_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b3_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     master_head = vt.get_workflow()
     self.assertEquals(len(master_head.modules), 3)
     self.assertEquals(master_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(master_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     self.assertEquals(master_head.modules[2].command.module_type,
                       PACKAGE_PYTHON)
     b2_head = vt.get_workflow(branch_id=newbranch.identifier)
     self.assertEquals(len(b2_head.modules), 2)
     self.assertEquals(b2_head.modules[0].command.module_type,
                       PACKAGE_PYTHON)
     self.assertEquals(b2_head.modules[1].command.module_type,
                       PACKAGE_VIZUAL)
     # Ensure there are exceptions raised when branching of an unknown branch
     # or module
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier,
                               source_branch='unknonw-branch',
                               properties={'name': 'New Branch'})
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier,
                               properties={'name': 'New Branch'},
                               module_id=100)
     with self.assertRaises(ValueError):
         self.db.create_branch(viztrail_id=vt.identifier)
     # Test branch provenance
     self.assertEquals(newbranch.provenance.source_branch, DEFAULT_BRANCH)
     self.assertEquals(newbranch.provenance.workflow_version, 1)
     self.assertEquals(newbranch.provenance.module_id, 1)
     self.assertEquals(thirdbranch.provenance.source_branch, DEFAULT_BRANCH)
     self.assertEquals(thirdbranch.provenance.workflow_version, 1)
     self.assertEquals(thirdbranch.provenance.module_id, 0)
Esempio n. 12
0
        row = rows[i]
        print row.values


cleanUp()

ENV = ExecEnv(FileServerConfig().from_dict({'directory': FILESERVER_DIR}),
              identifier=ENGINEENV_MIMIR).from_dict(
                  {'datastore': {
                      'directory': DATASTORE_DIR
                  }})

datastore = MimirDataStore(DATASTORE_DIR)
fileserver = DefaultFileServer(FILESERVER_DIR)
vizual = MimirVizualEngine(datastore, fileserver)
db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV})

mimir.initialize()

vt = db.create_viztrail(ENV.identifier, {'name': 'My Project'})

#
# LOAD DATASET
#
f_handle = fileserver.upload_file(CSV_FILE)
db.append_workflow_module(viztrail_id=vt.identifier,
                          command=cmd.load_dataset(f_handle.identifier,
                                                   DS_NAME))
wf = db.get_workflow(viztrail_id=vt.identifier)
ds = datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])
print_dataset(ds)
Esempio n. 13
0
datastores = list()
for env_id in config.envs:
    env_conf = config.envs[env_id]
    if env_id == ENGINEENV_DEFAULT:
        datastores.append(FileSystemDataStore(env_conf.datastore.directory))
    elif env_id == ENGINEENV_MIMIR:
        datastores.append(MimirDataStore(env_conf.datastore.directory))
    else:
        raise RuntimeError('unknown execution environment \'' + env_id + '\'')
# Federate data stores if more than one was given
if len(datastores) > 1:
    datastore = FederatedDataStore(datastores)
else:
    datastore = datastores[0]

viztrails = FileSystemViztrailRepository(config.viztrails.directory,
                                         config.envs)

# Initialize the Web Service API.
api = VizierWebService(viztrails, datastore, fileserver, config)

# ------------------------------------------------------------------------------
#
# Routes
#
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
# Service
# ------------------------------------------------------------------------------
@app.route('/')