def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fileserver)
def setUp(self): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) # Setup project repository self.db = DefaultFileServer(SERVER_DIR)
def test_mem_client(self): """Run tests for default engine and in-memory data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = InMemDataStore() self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs)))
def test_fs_client(self): """Run tests for default engine and file server data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = FileSystemDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs)))
def test_mimir_client(self): """Run tests for default engine and Mimir data store.""" mimir.initialize() self.fs = DefaultFileServer(SERVER_DIR) self.ds = MimirDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) mimir.finalize()
def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV})
def setUp(self): """Create empty data store directory.""" # Setup file server and upload file if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) self.db = FileSystemDataStore(DATASTORE_DIRECTORY)
def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs)
def set_up_default(self): """Setup configuration using default Vizual engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} )
class TestDataStore(unittest.TestCase): def setUp(self): """Create empty data store directory.""" # Setup file server and upload file if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) self.db = FileSystemDataStore(DATASTORE_DIRECTORY) def tearDown(self): """Delete data store directory. """ for d in [DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_datastore(self): """Test functionality of the file server data store.""" ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(ds.row_count, 2) ds = self.db.get_dataset(ds.identifier) names = ['Name', 'Age', 'Salary'] for i in range(3): col = ds.columns[i] self.assertEquals(col.identifier, i) self.assertEquals(col.name, names[i]) rows = ds.fetch_rows() self.assertEquals(len(rows), ds.row_count) for i in range(len(rows)): row = rows[i] self.assertEquals(row.identifier, i) rows[0].values[0] = 'Jane' ds = self.db.create_dataset(columns=ds.columns, rows=rows) ds = self.db.get_dataset(ds.identifier) for i in range(3): col = ds.columns[i] self.assertEquals(col.identifier, i) self.assertEquals(col.name, names[i]) rows = ds.fetch_rows() self.assertEquals(len(rows), ds.row_count) for i in range(len(rows)): row = rows[i] self.assertEquals(row.identifier, i) self.assertEquals(rows[0].values[0], 'Jane')
def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories self.config = AppConfig() env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PLOT ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.config.envs[self.ENGINE_ID] = env self.config.fileserver = env.fileserver for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) self.api = VizierWebService(self.db, self.datastore, self.fileserver, self.config)
class TestLoadMimirDataset(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) def tearDown(self): """Clean-up by deleting directories. """ self.cleanUp() def cleanUp(self): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_load(self): """Run workflow with default configuration.""" # Ignore files that raised errors (or are taking too much time to load) ignore_files = ['JSONOUTPUTWIDE.csv'] data_types = set() mimir.initialize() for filename in os.listdir(LOAD_DIR): if filename in ignore_files: continue print 'LOAD ' + filename filename = os.path.join(LOAD_DIR, filename) f_handle = self.fileserver.upload_file(filename) ds = self.datastore.load_dataset(f_handle) ds_load = self.datastore.get_dataset(ds.identifier) for col in ds_load.columns: data_types.add(col.data_type) print '\t' + col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')' print '\t' + str(ds.row_count) + ' row(s)' self.assertEquals(len(ds.columns), len(ds_load.columns)) self.assertEquals(ds.column_counter, ds_load.column_counter) self.assertEquals(ds.row_counter, ds_load.row_counter) rows = ds.fetch_rows() self.assertEquals(ds.row_counter, len(rows)) self.assertEquals(ds.row_count, len(rows)) for i in range(len(rows)): row = rows[i] self.assertEquals(row.identifier, i) self.assertEquals(len(row.values), len(ds.columns)) mimir.finalize() print data_types
def setUp(self): """Create an new Web Service API.""" # Clear various directories for d in [WORKTRAILS_DIR, DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) # Setup datastore and API self.config = AppConfig() self.ENV = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = self.ENV.identifier self.config.envs[self.ENGINE_ID] = self.ENV self.config.fileserver = self.ENV.fileserver self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.api = VizierWebService( FileSystemViztrailRepository(WORKTRAILS_DIR, {self.ENV.identifier: self.ENV}), self.datastore, self.fileserver, self.config)
class TestMimirAnnotations(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {ENV.identifier: ENV}) def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_annotations(self): """Test DOMAIN lens.""" # Create new work trail and create dataset from CSV file mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name': 'My Project'}) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) annos = ds.get_annotations(column_id=1, row_id=2) self.assertEquals(len(annos), 2) for anno in annos: self.assertEquals(anno.key, ANNO_UNCERTAIN) mimir.finalize()
def setUp(self): """Create an new Web Service API.""" # Clear various directories for d in [WORKTRAILS_DIRECTORY, DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) # Setup datastore and API self.config = AppConfig(configuration_file=CONFIG_FILE) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.config.envs = { 'default': TestEnv(), 'blocked': self.config.envs[ENGINEENV_DEFAULT] } self.datastore = FileSystemDataStore(DATASTORE_DIRECTORY) self.api = VizierWebService( FileSystemViztrailRepository( WORKTRAILS_DIRECTORY, self.config.envs ), self.datastore, self.fileserver, self.config )
configuration is used. """ config = AppConfig() # Create the app and enable cross-origin resource sharing app = Flask(__name__) app.config['APPLICATION_ROOT'] = config.api.app_path app.config['DEBUG'] = config.debug # Set size limit for uploaded files app.config['MAX_CONTENT_LENGTH'] = config.fileserver.max_file_size CORS(app) # Currently uses the default file server fileserver = DefaultFileServer(config.fileserver.directory) # Create datastore for the API. Different execution environments may use # different data stores. The API needs to be able to serve datasets from all # of them. Thus, if more than one execution environment is specified we need # to use a federated datastore. Individual viztrails will create their own # instances of their respective data store. datastores = list() for env_id in config.envs: env_conf = config.envs[env_id] if env_id == ENGINEENV_DEFAULT: datastores.append(FileSystemDataStore(env_conf.datastore.directory)) elif env_id == ENGINEENV_MIMIR: datastores.append(MimirDataStore(env_conf.datastore.directory)) else: raise RuntimeError('unknown execution environment \'' + env_id + '\'')
class TestVizualEngine(unittest.TestCase): def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs) self.file = self.fs.upload_file(CSV_FILE) def tear_down(self, engine): """Clean-up by dropping file server directory. """ # Drop data store directory if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) def test_default_engine(self): """Test functionality if the default VizUAL engine.""" self.run_engine_tests(ENGINEENV_DEFAULT) def test_mimir_engine(self): """Test functionality if the Mimir VizUAL engine.""" import vistrails.packages.mimir.init as mimir mimir.initialize() self.run_engine_tests(ENGINEENV_MIMIR) mimir.finalize() def run_engine_tests(self, engine): """Run sequence of tests for given engine.""" self.load_dataset(engine) self.insert_column(engine) self.insert_row(engine) self.delete_column(engine) self.delete_row(engine) self.move_column(engine) self.move_row(engine) self.rename_column(engine) self.update_cell(engine) self.filter_columns(engine) self.sort_dataset(engine) self.sequence_of_steps(engine) def delete_column(self, engine): """Test functionality to delete a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Delete Age column col_id = ds.column_by_name('AGE').identifier col_count, id1 = self.vizual.delete_column(ds.identifier, col_id) del col_ids[1] # Result should indicate that one column was deleted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset and ensure that it cobtains the following # # Name, Salary # ------------ # Alice, 35K # Bob, 30K ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Schema is Name, Salary self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.columns[0].name.upper(), 'NAME') self.assertEquals(ds.columns[1].name.upper(), 'SALARY') # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Make sure that all rows only have two columns row = ds_rows[0] self.assertEquals(len(row.values), 2) self.assertEquals(len(row.values), 2) self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], '35K') row = ds_rows[1] self.assertEquals(len(row.values), 2) self.assertEquals(len(row.values), 2) self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], '30K') # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.delete_column('unknown:uri', 0) self.tear_down(engine) def delete_row(self, engine): """Test functionality to delete a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Delete second row row_count, id1 = self.vizual.delete_row(ds.identifier, 1) del row_ids[1] # Result should indicate that one row was deleted. The identifier of the # resulting dataset should differ from the identifier of the original # dataset self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset and ensure that it contains the following # data: # # Name, Age, Salary # ------------ # Alice, 23, 35K ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Schema is Name, Salary col_names = ['Name', 'Age', 'Salary'] self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].name.upper(), col_names[i].upper()) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # There should only be one row self.assertEquals(len(ds_rows), 1) # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.delete_row('unknown:uri', 1) # Ensure exception is thrown if row index is out of bounds with self.assertRaises(ValueError): self.vizual.delete_row(ds.identifier, 100) self.tear_down(engine) def filter_columns(self, engine): """Test projection of a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) count, ds_id = self.vizual.filter_columns(ds.identifier, [2, 0], ['BD', None]) ds = self.datastore.get_dataset(ds_id) self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.columns[0].identifier, 2) self.assertEquals(ds.columns[0].name.upper(), 'BD') self.assertEquals(ds.columns[1].identifier, 0) self.assertEquals(ds.columns[1].name.upper(), 'NAME') rows = ds.fetch_rows() self.assertEquals(rows[0].values, ['35K', 'Alice']) self.assertEquals(rows[1].values, ['30K', 'Bob']) with self.assertRaises(ValueError): self.vizual.filter_columns(ds.identifier, [0, 1], ['BD', None]) self.tear_down(engine) def insert_column(self, engine): """Test functionality to insert a columns.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Insert columns at position 1 col_ids.insert(1, ds.column_counter) col_count, id1 = self.vizual.insert_column(ds.identifier, 1, 'Height') # Result should indicate that one column was inserted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve dataset and ensure that it has the following schema: # Name, Height, Age, Salary ds = self.datastore.get_dataset(id1) col_names = ['Name', 'Height', 'Age', 'Salary'] # Ensure that there are four rows self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(col_names)): col = ds.columns[i] self.assertEquals(col.identifier, col_ids[i]) self.assertEquals(col.name.upper(), col_names[i].upper()) # Insert columns at last position col_ids.append(ds.column_counter) col_names.append('Weight') col_count, id2 = self.vizual.insert_column(id1, 4, 'Weight') # Result should indicate that one column was deleted. The identifier of # the resulting dataset should differ from the identifier of the # previous dataset self.assertEquals(col_count, 1) self.assertNotEquals(id1, id2) # Retrieve dataset and ensure that it has the following schema: # Name, Height, Age, Salary, Weight ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() # Ensure that there are five rows self.assertEquals(len(ds.columns), len(col_names)) for i in range(len(col_names)): col = ds.columns[i] self.assertEquals(col.identifier, col_ids[i]) self.assertEquals(col.name.upper(), col_names[i].upper()) # The cell values for new columns are None all other values are not None for row in ds_rows: for i in range(len(ds.columns)): if i == 1 or i == 4: self.assertTrue(is_null(row.values[i])) else: self.assertFalse(is_null(row.values[i])) # Ensure that row identifier haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.insert_column('unknown:uri', 1, 'Height') # Ensure exception is thrown if column name is invalid self.vizual.insert_column(ds.identifier, 1, 'Height from ground') with self.assertRaises(ValueError): self.vizual.insert_column(ds.identifier, 1, 'Height from ground!@#') # Ensure exception is thrown if column position is out of bounds with self.assertRaises(ValueError): self.vizual.insert_column(ds.identifier, 100, 'Height') self.tear_down(engine) def insert_row(self, engine): """Test functionality to insert a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Insert row at index position 1 row_ids.insert(1, ds.row_counter) # Result should indicate that one row was inserted. The identifier of # the resulting dataset should differ from the identifier of the # original dataset row_count, id1 = self.vizual.insert_row(ds.identifier, 1) self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) # Retrieve modified dataset ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() # Ensure that there are three rows self.assertEquals(len(ds_rows), 3) # The second row has empty values for each column row = ds_rows[1] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertTrue(is_null(row.values[i])) # Append row at end current dataset row_ids.append(ds.row_counter) row_count, id2 = self.vizual.insert_row(id1, 3) self.assertEquals(row_count, 1) self.assertNotEquals(id1, id2) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() # Ensure that there are three rows self.assertEquals(len(ds_rows), 4) # The next to last row has non-empty values for each column row = ds_rows[2] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertFalse(is_null(row.values[i])) # The last row has empty values for each column row = ds_rows[3] self.assertEquals(len(row.values), len(ds.columns)) for i in range(len(ds.columns)): self.assertTrue(is_null(row.values[i])) # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.insert_row('unknown:uri', 1) # Ensure exception is thrown if row index is out of bounds with self.assertRaises(ValueError): self.vizual.insert_row(ds.identifier, 5) # Ensure no exception is raised self.vizual.insert_row(ds.identifier, 4) self.tear_down(engine) def load_dataset(self, engine): """Test functionality to load a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() self.assertEquals(len(ds.columns), 3) self.assertEquals(len(ds_rows), 2) for row in ds_rows: self.assertTrue(isinstance(row.values[1], int)) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.load_dataset('unknown:uri') self.tear_down(engine) def move_column(self, engine): """Test functionality to move a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Swap first two columns c = col_ids[0] del col_ids[0] col_ids.insert(1, c) col_count, id1 = self.vizual.move_column( ds.identifier, ds.column_by_name('Name').identifier, 1) self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 23) self.assertEquals(row.values[1], 'Alice') self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 32) self.assertEquals(row.values[1], 'Bob') self.assertEquals(row.values[2], '30K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Swap last two columns c = col_ids[1] del col_ids[1] col_ids.append(c) col_count, id2 = self.vizual.move_column( id1, ds.column_by_name('Salary').identifier, 1) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Salary'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Name'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 23) self.assertEquals(row.values[1], '35K') self.assertEquals(row.values[2], 'Alice') row = ds_rows[1] self.assertEquals(row.values[0], 32) self.assertEquals(row.values[1], '30K') self.assertEquals(row.values[2], 'Bob') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Raise error if source column is out of bounds with self.assertRaises(ValueError): self.vizual.move_column(id2, 40, 1) # Raise error if target position is out of bounds with self.assertRaises(ValueError): self.vizual.move_column(id2, ds.column_by_name('Name').identifier, -1) with self.assertRaises(ValueError): self.vizual.move_column(id2, ds.column_by_name('Name').identifier, 4) self.tear_down(engine) def move_row(self, engine): """Test functionality to move a row.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Swap first two rows row_ids = [row for row in reversed(row_ids)] row_count, id1 = self.vizual.move_row(ds.identifier, 0, 1) self.assertEquals(row_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') row = ds_rows[1] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Swap last two rows row_ids = [row for row in reversed(row_ids)] row_count, id2 = self.vizual.move_row(id1, 1, 0) ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Name'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) row = ds_rows[0] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Move first row to the end row_count, id3 = self.vizual.move_row(id2, 0, 2) row_ids = [row for row in reversed(row_ids)] ds = self.datastore.get_dataset(id3) ds_rows = ds.fetch_rows() row = ds_rows[0] self.assertEquals(row.values[0], 'Bob') self.assertEquals(row.values[1], 32) self.assertEquals(row.values[2], '30K') row = ds_rows[1] self.assertEquals(row.values[0], 'Alice') self.assertEquals(row.values[1], 23) self.assertEquals(row.values[2], '35K') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Raise error if source row is out of bounds with self.assertRaises(ValueError): self.vizual.move_row(id2, 3, 1) # Raise error if target position is out of bounds with self.assertRaises(ValueError): self.vizual.move_row(id2, 0, -1) with self.assertRaises(ValueError): self.vizual.move_row(id2, 1, 4) self.tear_down(engine) def rename_column(self, engine): """Test functionality to rename a column.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Rename first column to Firstname col_count, id1 = self.vizual.rename_column( ds.identifier, ds.column_by_name('Name').identifier, 'Firstname') self.assertEquals(col_count, 1) self.assertNotEquals(id1, ds.identifier) ds = self.datastore.get_dataset(id1) self.assertEquals(ds.columns[0].name.upper(), 'Firstname'.upper()) self.assertEquals(ds.columns[1].name.upper(), 'Age'.upper()) self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) col_count, id2 = self.vizual.rename_column( id1, ds.column_by_name('Age').identifier, 'BDate') ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds.columns[0].name.upper(), 'Firstname'.upper()) self.assertEquals(ds.columns[1].name, 'BDate') self.assertEquals(ds.columns[2].name.upper(), 'Salary'.upper()) # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Ensure exception is thrown if dataset identifier is unknown with self.assertRaises(ValueError): self.vizual.rename_column('unknown:uri', 0, 'Firstname') # Ensure exception is thrown for invalid column id with self.assertRaises(ValueError): self.vizual.rename_column(id2, 500, 'BDate') self.tear_down(engine) def sequence_of_steps(self, engine): """Test sequence of calls that modify a dataset.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) count, ds_id = self.vizual.insert_row(ds.identifier, 1) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.insert_column(ds_id, 3, 'HDate') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('HDate').identifier, 0, '180') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('HDate').identifier, 1, '160') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.rename_column( ds_id, ds.column_by_name('HDate').identifier, 'Height') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Height').identifier, 2, '170') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.move_row(ds_id, 1, 2) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Name').identifier, 2, 'Carla') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Age').identifier, 2, '45') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.update_cell( ds_id, ds.column_by_name('Salary').identifier, 2, '56K') ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.move_column( ds_id, ds.column_by_name('Salary').identifier, 4) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_column( ds_id, ds.column_by_name('Age').identifier) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_row(ds_id, 0) ds = self.datastore.get_dataset(ds_id) count, ds_id = self.vizual.delete_row(ds_id, 0) ds = self.datastore.get_dataset(ds_id) ds_rows = ds.fetch_rows() names = ['Name', 'Height', 'Salary'] self.assertEquals(len(ds.columns), len(names)) for i in range(len(names)): col = ds.columns[i] self.assertEquals(col.name.upper(), names[i].upper()) self.assertEquals([col.identifier for col in ds.columns], [0, 3, 2]) self.assertEquals(len(ds_rows), 1) self.assertEquals(ds_rows[0].values, ['Carla', '160', '56K']) self.assertEquals(ds_rows[0].identifier, 2) self.tear_down(engine) def sort_dataset(self, engine): """Test sorting a dataset.""" self.set_up(engine) # Create a new dataset fh = self.fs.upload_file(SORT_FILE) ds = self.vizual.load_dataset(fh.identifier) count, ds_id = self.vizual.sort_dataset(ds.identifier, [1, 2, 0], [False, False, True]) ds = self.datastore.get_dataset(ds_id) rows = ds.fetch_rows() names = ['Alice', 'Bob', 'Dave', 'Gertrud', 'Frank'] result = list() for row in rows: name = row.values[0] if name in names: result.append(name) for i in range(len(names)): self.assertEquals(names[i], result[i]) count, ds_id = self.vizual.sort_dataset(ds.identifier, [2, 1, 0], [True, False, True]) ds = self.datastore.get_dataset(ds_id) rows = ds.fetch_rows() names = ['Gertrud', 'Frank', 'Bob', 'Alice', 'Dave'] result = list() for row in rows: name = row.values[0] if name in names: result.append(name) for i in range(len(names)): self.assertEquals(names[i], result[i]) self.tear_down(engine) def update_cell(self, engine): """Test functionality to update a dataset cell.""" self.set_up(engine) # Create a new dataset ds = self.vizual.load_dataset(self.file.identifier) ds_rows = ds.fetch_rows() # Keep track of column and row identifier col_ids = [col.identifier for col in ds.columns] row_ids = [row.identifier for row in ds_rows] # Update cell [0, 0]. Ensure that one row was updated and a new # identifier is generated. Also ensure that the resulting datasets # has the new value in cell [0, 0] upd_rows, id1 = self.vizual.update_cell(ds.identifier, 0, 0, 'MyValue') self.assertEquals(upd_rows, 1) self.assertNotEquals(ds.identifier, id1) ds = self.datastore.get_dataset(id1) ds_rows = ds.fetch_rows() self.assertEquals(ds_rows[0].values[0], 'MyValue') upd_rows, id2 = self.vizual.update_cell( id1, ds.column_by_name('Name').identifier, 0, 'AValue') ds = self.datastore.get_dataset(id2) ds_rows = ds.fetch_rows() self.assertEquals(ds_rows[0].values[0], 'AValue') self.assertEquals(ds_rows[0].values[ds.column_index('Name')], 'AValue') # Ensure that row ids haven't changed for i in range(len(ds_rows)): self.assertEquals(ds_rows[i].identifier, row_ids[i]) # Make sure column identifier haven't changed for i in range(len(ds.columns)): self.assertEquals(ds.columns[i].identifier, col_ids[i]) # Set value to None upd_rows, id3 = self.vizual.update_cell( id2, ds.column_by_name('Name').identifier, 0, None) ds = self.datastore.get_dataset(id3) ds_rows = ds.fetch_rows() self.assertIsNone(ds_rows[0].values[0]) self.assertIsNone(ds_rows[0].values[ds.column_index('Name')]) # Ensure exception is thrown if column is unknown with self.assertRaises(ValueError): self.vizual.update_cell(ds.identifier, 100, 0, 'MyValue') # Ensure exception is thrown if row index is out ouf bounds with self.assertRaises(ValueError): self.vizual.update_cell(ds.identifier, 0, 100, 'MyValue') self.tear_down(engine)
class TestLoadMimirDataset(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Cleanup first self.cleanUp() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fileserver) def tearDown(self): """Clean-up by deleting directories. """ self.cleanUp() def cleanUp(self): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_load(self): """Run workflow with default configuration.""" mimir.initialize() self.update_cell(CSV_FILE, 2, 0, 'int', 10) self.update_cell(CSV_FILE, 2, 0, 'int', 10.3, result_type='real') self.update_cell(CSV_FILE, 2, 0, 'int', None) self.update_cell(CSV_FILE, 3, 0, 'real', 10.3) self.update_cell(CSV_FILE, 3, 0, 'real', 10, result_value=10.0) self.update_cell(CSV_FILE, 3, 0, 'real', 'A', result_type='varchar') self.update_cell(CSV_FILE, 3, 0, 'real', None) self.update_cell(CSV_FILE, 4, 0, 'varchar', 'A') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10, result_value='10') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10.87, result_value='10.87') self.update_cell(CSV_FILE, 4, 0, 'varchar', None) self.update_cell(CSV_FILE, 8, 0, 'bool', 'False', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', '0', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', None) self.update_cell(CSV_FILE, 8, 1, 'bool', True, result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', '1', result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', 'A', result_value='A', result_type='varchar') self.update_cell(CSV_FILE, 8, 1, 'bool', 10.87, result_value='10.87', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '2018-05-09') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '20180509', result_value='20180509', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', None) self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', '2018-05-09 12:03:22.0000') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', 'ABC', result_value='ABC', result_type='varchar') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', None) mimir.finalize() def update_cell(self, filename, col, row, data_type, value, result_value=None, result_type=None): """Update the value of the given cell. The column data type is expected to match the given datatype. The optional result value is the expected value of the cell in the modified dataset. """ f_handle = self.fileserver.upload_file(filename) ds = self.datastore.load_dataset(f_handle) #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns] self.assertEquals(ds.columns[col].data_type, data_type) rows = ds.fetch_rows() self.assertNotEquals(rows[row].values[col], value) _, ds_id = self.vizual.update_cell(ds.identifier, col, row, value) ds = self.datastore.get_dataset(ds_id) #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns] if result_type is None: self.assertEquals(ds.columns[col].data_type, data_type) else: self.assertEquals(ds.columns[col].data_type, result_type) rows = ds.fetch_rows() if result_value is None: self.assertEquals(rows[row].values[col], value) else: self.assertEquals(rows[row].values[col], result_value) self.fileserver.delete_file(f_handle.identifier)
class TestWebServiceAPI(unittest.TestCase): def setUp(self): """Create an new Web Service API.""" # Clear various directories for d in [WORKTRAILS_DIR, DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) # Setup datastore and API self.config = AppConfig() self.ENV = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = self.ENV.identifier self.config.envs[self.ENGINE_ID] = self.ENV self.config.fileserver = self.ENV.fileserver self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.api = VizierWebService( FileSystemViztrailRepository(WORKTRAILS_DIR, {self.ENV.identifier: self.ENV}), self.datastore, self.fileserver, self.config) def tearDown(self): """Clean-up by deleting created directories. """ for d in [WORKTRAILS_DIR, DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_service_descriptors(self): """Ensure validity of the service descriptor and build information.""" desc = self.api.service_overview() # The descriptor is expected to contain three elements: name, title, and # links. Name and title should be the same as in the default config self.validate_keys(desc, ['name', 'envs', 'properties', 'links']) self.assertEquals(desc['name'], self.config.name) self.assertFalse(len(desc['envs']) == 0) for env in desc['envs']: self.validate_keys( env, ['id', 'name', 'description', 'default', 'packages']) # Expect five references in the link listing: self, build, upload, doc, # and projects self.validate_links(desc['links'], [ 'self', 'build', 'doc', 'upload', 'projects', 'notebooks', 'files' ]) # The build information should have two elements: components and links build = self.api.system_build() self.assertEquals(len(build), 2) for key in ['components', 'links']: self.assertTrue(key in build) # The components list should include three entries (projects, datasets, # and workflows, each with name and version information. components = {c['name']: c['build'] for c in build['components']} self.assertEquals(len(components), 3) for key in ['datastore', 'fileserver', 'viztrails']: self.assertTrue(key in components) for info in ['name', 'version']: self.assertTrue(info in components[key]) def test_files(self): """Test API calls to upload and retrieve datasets.""" # Upload a new dataset fh = self.api.upload_file(CSV_FILE) # The result should contain five elements: id, name, columns, rows, and # links self.validate_file_handle(fh) # Retrieve the full dataset from the API fh = self.api.get_file(fh['id']) self.validate_file_handle(fh) # Retrieving an unknown dataset should return None self.assertIsNone(self.api.get_file('invalid id')) self.assertIsNone(self.api.get_file('f0f0f0f0f0f0f0f0f0f0f0f0')) self.validate_file_listing(self.api.list_files(), 1) self.api.upload_file(TSV_FILE) self.validate_file_listing(self.api.list_files(), 2) self.validate_file_handle(self.api.rename_file(fh['id'], 'myfile')) self.validate_file_listing(self.api.list_files(), 2) self.assertIsNone(self.api.rename_file('invalid id', 'afile')) self.assertTrue(self.api.delete_file(fh['id'])) self.validate_file_listing(self.api.list_files(), 1) def test_datasets(self): """Test retireval of datasets.""" ds = self.datastore.load_dataset(self.fileserver.upload_file(CSV_FILE)) ds = self.datastore.create_dataset(columns=ds.columns, rows=ds.fetch_rows()) self.validate_dataset_handle(self.api.get_dataset(ds.identifier)) anno = self.api.update_dataset_annotation(ds.identifier, column_id=0, key='comment', value='Hello') anno_id = anno['annotations'][0]['id'] self.api.update_dataset_annotation(ds.identifier, row_id=1, key='comment', value='World') self.api.update_dataset_annotation(ds.identifier, column_id=1, row_id=0, key='comment', value='!') self.validate_dataset_annotations(ds.identifier, column_id=0, expected={'comment': 'Hello'}) self.validate_dataset_annotations(ds.identifier, row_id=1, expected={'comment': 'World'}) self.validate_dataset_annotations(ds.identifier, column_id=1, row_id=0, expected={'comment': '!'}) # Update annotations self.api.update_dataset_annotation(ds.identifier, anno_id=anno_id, column_id=0, key='comment', value='Some Name') self.validate_dataset_annotations(ds.identifier, column_id=0, expected={'comment': 'Some Name'}) # Make sure unknown datasets are handeled correctly self.assertIsNone(self.api.get_dataset('someunknonwidentifier')) self.assertIsNone( self.api.get_dataset_annotations('someunknonwidentifier')) def test_projects(self): """Test API calls to create and manipulate projects.""" # Create a new project ph = self.api.create_project(self.ENV.identifier, {'name': 'My Project'}) self.validate_project_descriptor(ph) self.validate_project_handle(self.api.get_project(ph['id'])) # Project listing self.validate_project_listing(self.api.list_projects(), 1) ph = self.api.create_project(self.ENV.identifier, {'name': 'A Project'}) self.validate_project_handle(self.api.get_project(ph['id'])) self.validate_project_listing(self.api.list_projects(), 2) # Update project properties props = {p['key']: p['value'] for p in ph['properties']} self.assertEquals(props['name'], 'A Project') ph = self.api.update_project_properties(ph['id'], {'name': 'New Name'}) self.validate_project_descriptor(ph) props = {p['key']: p['value'] for p in ph['properties']} self.assertEquals(props['name'], 'New Name') # Module specifications modules = self.api.list_module_specifications_for_project(ph['id']) self.assertEquals(len(modules), 3) self.validate_keys(modules, ['modules', 'project', 'links']) self.validate_project_descriptor(modules['project']) for m in modules['modules']: self.validate_keys(m, ['type', 'id', 'name', 'arguments'], optional_keys=['group']) arg_keys = ['id', 'label', 'name', 'datatype', 'index', 'required'] for arg in m['arguments']: self.assertTrue(len(arg) >= len(arg_keys)) for k in arg_keys: self.assertTrue(k in arg) # Delete project self.assertTrue(self.api.delete_project(ph['id'])) self.validate_project_listing(self.api.list_projects(), 1) # Retrieve non-existing project should return None self.assertIsNone(self.api.get_project('invalid-id')) # Delete a non existing project should return False self.assertFalse(self.api.delete_project(ph['id'])) # Updating a non exisiting project should return None self.assertIsNone( self.api.update_project_properties(ph['id'], {'name': 'New Name'})) def test_spreadsheet(self): """Ensure that the includeDataset option is working for spreadsheet updates.""" # Upload file fh = self.fileserver.upload_file(CSV_FILE) # Create project ph = self.api.create_project(self.ENV.identifier, {'name': 'My Project'}) pid = ph['id'] # Load dataset DS_NAME = 'myDS' cmd = load_dataset(fh.identifier, DS_NAME) result = self.api.append_module(pid, DEFAULT_BRANCH, -1, cmd) self.validate_keys(result, ['workflow', 'modules', 'datasets']) # Update cell and request to include dataset cmd = update_cell(DS_NAME, 0, 0, 'A') result = self.api.append_module(pid, DEFAULT_BRANCH, -1, cmd, includeDataset={ 'name': DS_NAME, 'offset': 0 }) self.validate_keys(result, ['workflow', 'dataset']) self.validate_dataset_handle(result['dataset']) # In case of an error the result contains the modules cmd = update_cell(DS_NAME, 100, 0, 'A') result = self.api.append_module(pid, DEFAULT_BRANCH, -1, cmd, includeDataset={ 'name': DS_NAME, 'offset': 0 }) self.validate_keys(result, ['workflow', 'modules', 'datasets']) def test_workflows(self): """Test API calls to retrieve and manipulate workflows.""" # Create a new project ph = self.api.create_project(self.ENV.identifier, {'name': 'My Project'}) self.validate_branch_listing(self.api.list_branches(ph['id']), 1) self.validate_branch_handle( self.api.get_branch(ph['id'], DEFAULT_BRANCH)) wf = self.api.get_workflow(ph['id'], DEFAULT_BRANCH) self.validate_workflow_handle(wf) # Raise exception when creating branch of empty workflow with self.assertRaises(ValueError): self.api.create_branch(ph['id'], DEFAULT_BRANCH, -1, 0, {'name': 'My Branch'}) # Result is None when creating of a non existing branch with self.assertRaises(ValueError): self.api.create_branch(ph['id'], 'unknown', -1, 0, {'name': 'My Branch'}) # Execute a new command last_modified = ph['lastModifiedAt'] result = self.api.append_module(ph['id'], DEFAULT_BRANCH, -1, python_cell('2+2')) self.validate_workflow_update_result(result) wf = self.api.get_workflow(ph['id'], DEFAULT_BRANCH) self.validate_workflow_handle(wf) modules = self.api.get_workflow_modules(ph['id'], DEFAULT_BRANCH, -1) self.validate_workflow_modules(modules, number_of_modules=1) self.assertNotEquals(last_modified, wf['project']['lastModifiedAt']) last_modified = wf['project']['lastModifiedAt'] # Create a new branch time.sleep(1) desc = self.api.create_branch(ph['id'], DEFAULT_BRANCH, -1, 0, {'name': 'My Branch'}) self.validate_branch_handle(desc) branch_wf = self.api.get_workflow(ph['id'], desc['id']) self.assertNotEquals(last_modified, branch_wf['project']['lastModifiedAt']) last_modified = branch_wf['project']['lastModifiedAt'] self.validate_workflow_handle(branch_wf) modules = self.api.get_workflow_modules(ph['id'], desc['id'], -1) self.validate_workflow_modules(modules, number_of_modules=1) # Replace module in new branch time.sleep(1) result = self.api.replace_module(ph['id'], desc['id'], -1, 0, python_cell('3+3')) self.validate_workflow_update_result(result) modules = self.api.get_workflow_modules(ph['id'], desc['id'], -1) self.validate_workflow_modules(modules, number_of_modules=1) branch_wf = self.api.get_workflow(ph['id'], desc['id']) # Ensure that the last modified date of the project has changed self.assertNotEquals(last_modified, branch_wf['project']['lastModifiedAt']) # Append module to new branch self.validate_workflow_update_result( self.api.append_module(ph['id'], desc['id'], -1, python_cell('4+4'))) modules = self.api.get_workflow_modules(ph['id'], desc['id'], -1) self.validate_workflow_modules(modules, number_of_modules=2) branch_wf = self.api.get_workflow_modules(ph['id'], desc['id']) self.assertEquals(len(branch_wf['modules']), 2) wf = self.api.get_workflow_modules(ph['id'], DEFAULT_BRANCH) self.assertEquals(len(wf['modules']), 1) self.validate_branch_listing(self.api.list_branches(ph['id']), 2) # Update new branch name branch_wf = self.api.update_branch(ph['id'], desc['id'], {'name': 'Some Branch'}) self.validate_branch_handle(branch_wf) n1 = self.api.get_branch(ph['id'], DEFAULT_BRANCH)['properties'][0]['value'] n2 = self.api.get_branch(ph['id'], desc['id'])['properties'][0]['value'] self.assertEquals(n2, 'Some Branch') self.assertNotEquals(n1, n2) # Retrieving the workflow for an unknown project should return None self.assertIsNone(self.api.get_workflow('invalid id', DEFAULT_BRANCH)) self.assertIsNone( self.api.get_workflow('f0f0f0f0f0f0f0f0f0f0f0f0', DEFAULT_BRANCH)) # Delete workflow self.assertTrue(self.api.delete_branch(ph['id'], desc['id'])) self.assertFalse(self.api.delete_branch(ph['id'], desc['id'])) with self.assertRaises(ValueError): self.api.delete_branch(ph['id'], DEFAULT_BRANCH) def test_workflow_commands(self): """Test API calls to execute workflow modules.""" # Create a new project pj = self.api.create_project(self.ENV.identifier, {'name': 'My Project'}) # Use Python load command to test module execution self.api.append_module(pj['id'], DEFAULT_BRANCH, -1, python_cell('2+2')) self.api.append_module(pj['id'], DEFAULT_BRANCH, -1, python_cell('3+3')) wf_master = self.api.get_workflow_modules(pj['id'], DEFAULT_BRANCH) content = list_modules_arguments_values(wf_master['modules']) self.assertEquals(len(content), 2) self.assertEquals(content[0], '2+2') self.assertEquals(content[1], '3+3') branch_id = self.api.create_branch(pj['id'], DEFAULT_BRANCH, -1, 0, {'name': 'My Name'})['id'] wf_branch = self.api.get_workflow_modules(pj['id'], branch_id) content = list_modules_arguments_values(wf_branch['modules']) self.assertEquals(len(content), 1) self.assertEquals(content[0], '2+2') # Replace first module in master and append to second branch_id self.api.replace_module(pj['id'], DEFAULT_BRANCH, -1, 0, python_cell('4+4')) self.api.append_module(pj['id'], branch_id, -1, python_cell('5+5')) wf_master = self.api.get_workflow_modules(pj['id'], DEFAULT_BRANCH) content = list_modules_arguments_values(wf_master['modules']) self.assertEquals(len(content), 2) self.assertEquals(content[0], '4+4') self.assertEquals(content[1], '3+3') wf_branch = self.api.get_workflow_modules(pj['id'], branch_id) content = list_modules_arguments_values(wf_branch['modules']) self.assertEquals(len(content), 2) self.assertEquals(content[0], '2+2') self.assertEquals(content[1], '5+5') # Delete module m_count = len(wf_branch['modules']) m_id = wf_branch['modules'][-1]['id'] self.api.delete_module(pj['id'], branch_id, -1, m_id) wf_branch = self.api.get_workflow_modules(pj['id'], branch_id) self.assertEquals(len(wf_branch['modules']), m_count - 1) for m in wf_branch['modules']: self.assertNotEquals(m['id'], m_id) self.assertIsNone(self.api.delete_module(pj['id'], branch_id, -1, 100)) def validate_branch_listing(self, listing, number_of_branches): self.validate_keys(listing, ['branches', 'links']) self.validate_links(listing['links'], ['self', 'create', 'project']) self.assertEquals(len(listing['branches']), number_of_branches) for br in listing['branches']: self.validate_branch_descriptor(br) def validate_branch_descriptor(self, branch): self.validate_keys(branch, ['id', 'properties', 'links']) self.validate_links(branch['links'], ['self', 'delete', 'head', 'project', 'update']) def validate_branch_handle(self, branch): self.validate_keys( branch, ['id', 'project', 'workflows', 'properties', 'links']) self.validate_links(branch['links'], ['self', 'delete', 'head', 'project', 'update']) self.validate_project_descriptor(branch['project']) for wf in branch['workflows']: self.validate_workflow_descriptor(wf) def validate_dataset_handle(self, ds): self.validate_keys(ds, [ 'id', 'columns', 'rows', 'links', 'offset', 'rowcount', 'annotatedCells' ]) for col in ds['columns']: self.validate_keys(col, ['id', 'name']) for row in ds['rows']: self.validate_keys(row, ['id', 'index', 'values']) self.validate_links( ds['links'], ['self', 'download', 'annotations', 'pagefirst', 'pagefirstanno']) def validate_dataset_annotations(self, ds_id, column_id=-1, row_id=-1, expected=dict()): annotations = self.api.get_dataset_annotations(ds_id, column_id=column_id, row_id=row_id) keys = ['links', 'annotations'] if column_id >= 0: keys.append('column') if row_id >= 0: keys.append('row') self.validate_keys(annotations, keys) annos = annotations['annotations'] self.assertEquals(len(annos), len(expected)) for anno in annos: self.validate_keys(anno, ['id', 'key', 'value']) key = anno['key'] self.assertTrue(key in expected) self.assertEquals(anno['value'], expected[key]) def validate_file_handle(self, fh): self.validate_keys(fh, [ 'id', 'name', 'columns', 'rows', 'filesize', 'createdAt', 'lastModifiedAt', 'links' ]) links = {l['rel']: l['href'] for l in fh['links']} self.validate_links(fh['links'], ['self', 'delete', 'rename', 'download']) def validate_file_listing(self, fl, number_of_files): self.validate_keys(fl, ['files', 'links']) links = {l['rel']: l['href'] for l in fl['links']} self.validate_links(fl['links'], ['self', 'upload']) self.assertEquals(len(fl['files']), number_of_files) for fh in fl['files']: self.validate_file_handle(fh) def validate_keys(self, obj, keys, optional_keys=list()): if len(obj) > len(keys): for key in obj: self.assertTrue(key in keys or key in optional_keys, msg='Invalid key ' + key) else: for key in keys: self.assertTrue(key in obj, msg='Missing key ' + key) def validate_links(self, links, keys): self.validate_keys({l['rel']: l['href'] for l in links}, keys) def validate_module_handle(self, module): self.validate_keys(module, [ 'id', 'command', 'text', 'stdout', 'stderr', 'datasets', 'links', 'views' ]) self.validate_keys(module['command'], ['type', 'id', 'arguments']) self.validate_links(module['links'], ['delete', 'insert', 'replace']) for ds in module['datasets']: self.validate_keys(ds, ['id', 'name']) def validate_project_descriptor(self, pd): self.validate_keys(pd, [ 'id', 'environment', 'createdAt', 'lastModifiedAt', 'properties', 'links' ]) links = {l['rel']: l['href'] for l in pd['links']} self.validate_keys( links, ['self', 'delete', 'home', 'update', 'branches', 'environment']) def validate_project_handle(self, ph, br_count=1): self.validate_keys(ph, [ 'id', 'environment', 'createdAt', 'lastModifiedAt', 'properties', 'branches', 'links' ]) self.validate_links( ph['links'], ['self', 'delete', 'home', 'update', 'branches', 'environment']) self.validate_keys(ph['environment'], ['id', 'modules']) self.assertEquals(len(ph['branches']), br_count) for br in ph['branches']: self.validate_branch_descriptor(br) def validate_project_listing(self, pl, number_of_projects): self.validate_keys(pl, ['projects', 'links']) self.validate_links(pl['links'], ['self', 'create', 'home']) self.assertEquals(len(pl['projects']), number_of_projects) for pj in pl['projects']: self.validate_project_descriptor(pj) def validate_workflow_descriptor(self, wf): self.validate_keys(wf, [ 'version', 'links', 'createdAt', 'packageId', 'commandId', 'action', 'statement' ]) self.validate_links( wf['links'], ['self', 'branch', 'branches', 'append', 'modules']) def validate_workflow_handle(self, wf): self.validate_keys(wf, [ 'project', 'branch', 'version', 'createdAt', 'state', 'links', 'readOnly' ]) self.validate_links( wf['links'], ['self', 'branch', 'branches', 'append', 'modules']) self.validate_project_descriptor(wf['project']) state = wf['state'] self.validate_keys(state, ['datasets', 'charts', 'hasError', 'moduleCount']) def validate_workflow_modules(self, wf, number_of_modules=0): self.validate_keys(wf, [ 'project', 'branch', 'version', 'modules', 'createdAt', 'links', 'datasets', 'readOnly' ]) self.validate_links(wf['links'], ['self', 'workflow']) self.validate_project_descriptor(wf['project']) self.assertEquals(len(wf['modules']), number_of_modules) for m in wf['modules']: self.validate_module_handle(m) def validate_workflow_update_result(self, wf): self.validate_keys(wf, ['workflow', 'modules', 'datasets']) self.validate_workflow_handle(wf['workflow']) for m in wf['modules']: self.validate_module_handle(m) for ds in wf['datasets']: self.validate_dataset_handle(ds)
FILESERVER_DIR = './env/fs' CSV_FILE = '../data/mimir/Employee.csv' #pick.csv def cleanUp(): """Remove datastore and fileserver directory.""" # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) cleanUp() datastore = MimirDataStore(DATASTORE_DIR) fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(datastore, fileserver) mimir.initialize() filename = CSV_FILE print 'LOAD ' + filename f_handle = fileserver.upload_file(filename) ds = datastore.load_dataset(f_handle) ds_load = datastore.get_dataset(ds.identifier) print [col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')' for col in ds_load.columns] print str(ds.row_count) + ' row(s)' rows = ds.fetch_rows() for i in range(len(rows)): row = rows[i]
class TestDatasetPaginationReader(unittest.TestCase): def set_up(self, engine): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) # Setup project repository self.fs = DefaultFileServer(FILESERVER_DIR) if engine == ENGINEENV_DEFAULT: self.datastore = FileSystemDataStore(DATASTORE_DIR) self.vizual = DefaultVizualEngine(self.datastore, self.fs) elif engine == ENGINEENV_MIMIR: self.datastore = MimirDataStore(DATASTORE_DIR) self.vizual = MimirVizualEngine(self.datastore, self.fs) def tear_down(self, engine): """Clean-up by dropping file server directory. """ # Drop data store directory if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) # Drop project descriptor directory if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) def test_default_engine(self): """Test functionality for the default setup.""" self.run_tests(ENGINEENV_DEFAULT) def test_mimir_engine(self): """Test functionality for the Mimir setup.""" import vistrails.packages.mimir.init as mimir mimir.initialize() self.run_tests(ENGINEENV_MIMIR) mimir.finalize() def run_tests(self, engine): """Run sequence of tests for given configuration.""" self.set_up(engine) ds = self.vizual.load_dataset(self.fs.upload_file(CSV_FILE).identifier) rows = ds.fetch_rows() self.assertEquals(len(rows), 7) rows = ds.fetch_rows(offset=1) self.assertEquals(len(rows), 6) self.assertEquals(rows[0].values[0], 'Bob') self.assertEquals(rows[5].values[0], 'Gertrud') rows = ds.fetch_rows(limit=2) self.assertEquals(len(rows), 2) self.assertEquals(rows[0].values[0], 'Alice') self.assertEquals(rows[1].values[0], 'Bob') rows = ds.fetch_rows(offset=4, limit=3) self.assertEquals(len(rows), 3) self.assertEquals(rows[0].values[0], 'Eileen') self.assertEquals(rows[2].values[0], 'Gertrud') rows = ds.fetch_rows(offset=5, limit=3) self.assertEquals(len(rows), 2) self.assertEquals(rows[0].values[0], 'Frank') self.assertEquals(rows[1].values[0], 'Gertrud') rows = ds.fetch_rows(offset=6, limit=3) self.assertEquals(len(rows), 1) self.assertEquals(rows[0].values[0], 'Gertrud') # Test larger dataset with deletes ds = self.vizual.load_dataset(self.fs.upload_file(TSV_FILE).identifier) rows = ds.fetch_rows(offset=0, limit=10) self.assertEquals(len(rows), 10) self.assertEquals([r.identifier for r in rows], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) _, id1 = self.vizual.delete_row(ds.identifier, 2) # ID=2 _, id2 = self.vizual.delete_row(id1, 4) # ID=5 ds = self.datastore.get_dataset(id2) rows = ds.fetch_rows(offset=0, limit=10) self.assertEquals([r.identifier for r in rows], [0, 1, 3, 4, 6, 7, 8, 9, 10, 11]) _, id1 = self.vizual.move_row(ds.identifier, 9, 1) # ID=11 _, id2 = self.vizual.move_row(id1, 9, 1) # ID=10 ds = self.datastore.get_dataset(id2) rows = ds.fetch_rows(offset=1, limit=10) self.assertEquals([r.identifier for r in rows], [10, 11, 1, 3, 4, 6, 7, 8, 9, 12]) rows = ds.fetch_rows(offset=2, limit=10) self.assertEquals([r.identifier for r in rows], [11, 1, 3, 4, 6, 7, 8, 9, 12, 13]) rows = ds.fetch_rows(offset=3, limit=10) self.assertEquals([r.identifier for r in rows], [1, 3, 4, 6, 7, 8, 9, 12, 13, 14]) self.tear_down(engine)
class TestUnicodeHandling(unittest.TestCase): def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def set_up(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) def set_up_default(self): """Setup configuration using default Vizual engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} ) def set_up_mimir(self): """Setup configuration using Mimir engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR, packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON, PACKAGE_MIMIR] ).from_dict({'datastore': {'directory': DATASTORE_DIR}}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {env.identifier: env} ) def test_vt_default(self): """Run workflow with default configuration.""" # Create new work trail and retrieve the HEAD workflow of the default # branch self.set_up_default() self.run_workflow() def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_workflow() mimir.finalize() def run_workflow(self): """Test functionality to execute a Python script that creates a dataset containing unicode characters.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name' : 'My Project'}) # LOAD DATASET self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # RUN Python Script self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PYTHON_SCRIPT) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) #print wf.modules[-1].stdout[0]['data'] ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) names = set(c.name.upper().replace('_', ' ') for c in ds.columns) self.assertTrue(len(names), 4) for name in ['DATE', 'IMO CODE', 'PORT OF DEPARTURE', 'PLACE OF RECEIPT']: self.assertTrue(name in names)
class TestDataStore(unittest.TestCase): def setUp(self): """Create empty data store directory.""" # Setup file server and upload file if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) self.db = MimirDataStore(DATASTORE_DIRECTORY) def tearDown(self): """Delete data store directory. """ for d in [DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_datastore(self): """Test functionality of the file server data store.""" mimir.initialize() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(ds.row_count, 2) cols = [('NAME', COL_PREFIX + '0', 'varchar'), ('AGE', COL_PREFIX + '1', 'int'), ('SALARY', COL_PREFIX + '2', 'varchar')] control_rows = [(0, ['Alice', 23, '35K']), (1, ['Bob', 32, '30K'])] for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Get dataset and repeat tests ds = self.db.get_dataset(ds.identifier) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(len(ds.row_ids), 2) for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Create dataset names = ['NAME', 'AGE', 'SALARY'] rows = ds.fetch_rows() rows[0].values[0] = 'Jane' rows = [rows[1], rows[0]] ds = self.db.create_dataset(columns=ds.columns, rows=rows) ds = self.db.get_dataset(ds.identifier) for i in range(3): col = ds.columns[i] self.assertEquals(col.identifier, i) self.assertEquals(col.name, names[i]) rows = ds.fetch_rows() for i in range(len(rows)): row = rows[(len(rows) - 1) - i] self.assertEquals(row.identifier, i) self.assertEquals(rows[1].values[0], 'Jane') # DONE mimir.finalize() def validate_column(self, column, col_props): """Validate that column name and data type are as expected.""" name, name_in_rdb, data_type = col_props self.assertEquals(column.name, name) self.assertEquals(column.name_in_rdb, name_in_rdb) self.assertEquals(column.data_type, data_type) def validate_rowid_column(self, col): """Ensure the row id column has the correct name and a data type.""" self.assertEquals(col.name, col.name_in_rdb) self.assertEquals(col.name, ROW_ID) self.assertEquals(col.data_type, 'int') def validate_rows(self, dataset_rows, control_rows): """Make sure all data is read correctly.""" self.assertEquals(len(dataset_rows), len(control_rows)) for i in range(len(dataset_rows)): ds_row = dataset_rows[i] row_id, values = control_rows[i] self.assertEquals(ds_row.identifier, row_id) self.assertEquals(ds_row.values, values)
class TestFileServer(unittest.TestCase): def setUp(self): """Create an empty file server repository.""" # Drop project descriptor directory if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) # Setup project repository self.db = DefaultFileServer(SERVER_DIR) def tearDown(self): """Clean-up by dropping file server directory. """ shutil.rmtree(SERVER_DIR) def test_delete_file(self): """Test delete file method.""" f = self.db.upload_file(CSV_FILE) f = self.db.get_file(f.identifier) self.assertIsNotNone(f) self.assertTrue(self.db.delete_file(f.identifier)) f = self.db.get_file(f.identifier) self.assertIsNone(f) def test_get_file(self): """Test file get method.""" f = self.db.upload_file(CSV_FILE) f = self.db.get_file(f.identifier) self.assertEquals(f.columns, 3) self.assertEquals(f.rows, 2) self.assertEquals(f.name, 'dataset.csv') # Ensure that the file parses as a zipped TSV file with f.open() as csvfile: rows = 0 for row in csv.reader(csvfile, delimiter=f.delimiter): self.assertEquals(len(row), f.columns) rows += 1 self.assertEquals(rows - 1, f.rows) def test_list_file(self): """Test upload of different file types and the list files method.""" fh = self.db.upload_file(CSV_FILE) self.assertFalse(fh.compressed) self.assertEquals(fh.delimiter, ',') fh = self.db.upload_file(GZIP_CSV_FILE) self.assertTrue(fh.compressed) self.assertEquals(fh.delimiter, ',') fh = self.db.upload_file(TSV_FILE) self.assertFalse(fh.compressed) self.assertEquals(fh.delimiter, '\t') fh = self.db.upload_file(GZIP_TSV_FILE) self.assertTrue(fh.compressed) self.assertEquals(fh.delimiter, '\t') files = self.db.list_files() self.assertEquals(len(files), 4) # Ensure that each of the files parses as a zipped TSV file for f in files: with f.open() as csvfile: rows = 0 for row in csv.reader(csvfile, delimiter=f.delimiter): self.assertEquals(len(row), f.columns) rows += 1 self.assertEquals(rows - 1, f.rows) def test_rename_file(self): """Test rename file method.""" f = self.db.upload_file(CSV_FILE) f = self.db.get_file(f.identifier) f = self.db.rename_file(f.identifier, 'somename') self.assertEquals(f.name, 'somename') f = self.db.get_file(f.identifier) f = self.db.rename_file(f.identifier, 'somename') self.assertEquals(f.name, 'somename') f = self.db.rename_file(f.identifier, 'somename') self.assertEquals(f.name, 'somename') def test_upload_file(self): """Test file upload.""" f = self.db.upload_file(CSV_FILE) self.assertEquals(f.columns, 3) self.assertEquals(f.rows, 2) self.assertEquals(f.name, 'dataset.csv')
class TestDataStore(unittest.TestCase): def setup_fileserver(self): """Create a fresh file server.""" if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) def set_up(self, store_type): """Create empty data store directory.""" if store_type == MEM_DATASTORE: self.db = InMemDataStore() else: # Remove directory if it exists if os.path.isdir(DATASTORE_DIRECTORY): shutil.rmtree(DATASTORE_DIRECTORY) os.mkdir(DATASTORE_DIRECTORY) if store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY) def tear_down(self, store_type): """Delete data store directory. """ for d in [DATASTORE_DIRECTORY, FILESERVER_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_federated_datastore(self): """Test functionality of the federated data store.""" self.setup_fileserver() store1 = InMemDataStore() store2 = InMemDataStore() fh = self.fileserver.upload_file(CSV_FILE) ds1 = store1.load_dataset(fh) ds2 = store2.load_dataset(fh) fed_store = FederatedDataStore([store1, store2]) self.assertIsNotNone(fed_store.get_dataset(ds1.identifier)) self.assertIsNotNone(fed_store.get_dataset(ds2.identifier)) self.assertIsNone(fed_store.get_dataset('UNDEFINED')) with self.assertRaises(NotImplementedError): fed_store.load_dataset(fh) self.assertIsNotNone(fed_store.update_annotation(ds1.identifier, column_id=0, key='name', value='My Name')) self.assertIsNotNone(fed_store.update_annotation(ds2.identifier, column_id=0, key='name', value='My Name')) self.assertIsNone(fed_store.update_annotation('UNDEFINED', column_id=0, key='name', value='My Name')) def test_fs_datastore(self): """Run test for file system datastore.""" self.run_tests(FS_DATASTORE) def test_mem_datastore(self): """Run test for in-memory datastore.""" self.run_tests(MEM_DATASTORE) def test_mimir_datastore(self): """Run test for Mimir datastore.""" mimir.initialize() self.run_tests(MIMIR_DATASTORE) self.set_up(MIMIR_DATASTORE) self.load_tsv() self.tear_down(MIMIR_DATASTORE) mimir.finalize() def test_volatile_datastore(self): """Test volatile data store on top of a file system data store.""" self.set_up(FS_DATASTORE) self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) ds_rows = ds.fetch_rows() self.assertEquals(len(ds_rows), ds.row_count) v_store = VolatileDataStore(self.db) # Make sure the existing dataset is accessible via the volatile store v_ds = DatasetClient(dataset=v_store.get_dataset(ds.identifier)) self.assertIsNotNone(v_ds) self.assertEquals(v_ds.get_cell('Salary', 1), '30K') # Create an updated dataset. The original should be the same in both # stores v_ds.rows[1].set_value('Salary', '40K') v_ds = v_store.create_dataset(columns=v_ds.columns, rows=v_ds.rows) self.assertEquals(DatasetClient(dataset=self.db.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.assertEquals(DatasetClient(dataset=v_store.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.assertEquals(DatasetClient(dataset=v_store.get_dataset(v_ds.identifier)).get_cell('Salary', 1), '40K') self.assertIsNone(self.db.get_dataset(v_ds.identifier)) # Delete both datasets. The volatile store is empty. The original should # be unchanged. self.assertTrue(v_store.delete_dataset(ds.identifier)) self.assertTrue(v_store.delete_dataset(v_ds.identifier)) self.assertFalse(v_store.delete_dataset(ds.identifier)) self.assertFalse(v_store.delete_dataset(v_ds.identifier)) self.assertIsNone(v_store.get_dataset(ds.identifier)) self.assertIsNone(v_store.get_dataset(v_ds.identifier)) self.assertEquals(DatasetClient(dataset=self.db.get_dataset(ds.identifier)).get_cell('Salary', 1), '30K') self.tear_down(FS_DATASTORE) def run_tests(self, store_type): """Run sequence of test for given data store type.""" self.set_up(store_type) self.dataset_life_cycle() self.tear_down(store_type) self.set_up(store_type) self.datastore_init(store_type) self.tear_down(store_type) self.set_up(store_type) self.dataset_read() self.tear_down(store_type) self.set_up(store_type) self.dataset_column_index() self.tear_down(store_type) def datastore_init(self, store_type): """Test initalizing a datastore with existing datasets.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) if store_type == MEM_DATASTORE: self.db = InMemDataStore() elif store_type == FS_DATASTORE: self.db = FileSystemDataStore(DATASTORE_DIRECTORY) elif store_type == MIMIR_DATASTORE: self.db = MimirDataStore(DATASTORE_DIRECTORY) def dataset_column_index(self): """Test the column by id index of the dataset handle.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) # Ensure that the project data has three columns and two rows self.assertEquals(ds.column_by_id(0).name.upper(), 'NAME') self.assertEquals(ds.column_by_id(1).name.upper(), 'AGE') self.assertEquals(ds.column_by_id(2).name.upper(), 'SALARY') with self.assertRaises(ValueError): ds.column_by_id(5) ds.columns.append(DatasetColumn(identifier=5, name='NEWNAME')) self.assertEquals(ds.column_by_id(5).name.upper(), 'NEWNAME') with self.assertRaises(ValueError): ds.column_by_id(4) def dataset_life_cycle(self): """Test create and delete dataset.""" self.setup_fileserver() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) # Ensure that the project data has three columns and two rows self.assertEquals(len(ds.columns), 3) self.assertEquals(len(ds.fetch_rows()), 2) self.assertEquals(ds.row_count, 2) # Delete dataset and ensure that the dataset directory no longer exists self.assertTrue(self.db.delete_dataset(ds.identifier)) self.assertFalse(self.db.delete_dataset(ds.identifier)) def dataset_read(self): """Test reading a dataset.""" self.setup_fileserver() dh = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) ds = self.db.get_dataset(dh.identifier) ds_rows = ds.fetch_rows() self.assertEquals(dh.identifier, ds.identifier) self.assertEquals(len(dh.columns), len(ds.columns)) self.assertEquals(len(dh.fetch_rows()), len(ds_rows)) self.assertEquals(len(dh.fetch_rows()), len(ds_rows)) self.assertEquals(dh.row_count, len(ds_rows)) # Name,Age,Salary # Alice,23,35K # Bob,32,30K self.assertEquals(ds.column_index('Name'), 0) self.assertEquals(ds.column_index('Age'), 1) self.assertEquals(ds.column_index('Salary'), 2) row = ds_rows[0] self.assertEquals(row.values[0], 'Alice') self.assertEquals(int(row.values[1]), 23) self.assertEquals(row.values[2], '35K') row = ds_rows[1] self.assertEquals(row.values[0], 'Bob') self.assertEquals(int(row.values[1]), 32) self.assertEquals(row.values[2], '30K') def load_tsv(self): """Test writing a dataset with duplicate name twice.""" self.setup_fileserver() fh = self.fileserver.upload_file(TSV_FILE) ds = self.db.load_dataset(fh) self.assertEquals(len(ds.columns), 3) self.assertEquals(ds.row_count, 2)
def setup_fileserver(self): """Create a fresh file server.""" if os.path.isdir(FILESERVER_DIR): shutil.rmtree(FILESERVER_DIR) os.mkdir(FILESERVER_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR)
class TestMimirLenses(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) vizual = MimirVizualEngine(self.datastore, self.fileserver) self.db = FileSystemViztrailRepository( VIZTRAILS_DIR, {ENV.identifier: ENV} ) def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_domain_lens(self): """Test DOMAIN lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain(DS_NAME, col_age.identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR AGE IN PEOPLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Introduce an error. Make sure command formating is correct self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR \'MY COL\' IN \'MY DS\'') mimir.finalize() def test_geocode_lens(self): """Test GEOCODE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(GEO_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Geocode Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE', house_nr=ds.column_by_name('STRNUMBER').identifier, street=ds.column_by_name('STRNAME').identifier, city=ds.column_by_name('CITY').identifier, state=ds.column_by_name('STATE').identifier ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE HOUSE_NUMBER=STRNUMBER,STREET=STRNAME,CITY=CITY,STATE=STATE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 6) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 3) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 8) mimir.finalize() def test_key_repair_lens(self): """Test KEY REPAIR lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(KEY_REPAIR_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds1 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair(DS_NAME, ds1.column_by_name('Empid').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR EMPID IN ' + DS_NAME.upper()) # Get dataset ds2 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) self.assertEquals(ds1.row_count, ds2.row_count) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 4) self.assertEquals(ds.row_count, 2) names = set() empids = set() rowids = set() for row in DatasetClient(dataset=ds).rows: rowids.add(row.identifier) empids.add(int(row.get_value('empid'))) names.add(row.get_value('name')) self.assertTrue(1 in empids) self.assertTrue(2 in rowids) self.assertTrue('Alice' in names) self.assertTrue('Carla' in names) # Test error case and command text self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR \'MY COL\' IN \'MY DS\'') mimir.finalize() def test_missing_value_lens(self): """Test MISSING_VALUE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value(DS_NAME, ds.column_by_name('AGE').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Annotations annotations = ds.get_annotations(column_id=1, row_id=4) self.assertEquals(len(annotations), 2) # MISSING VALUE Lens with value constraint vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'New Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier, constraint='> 30') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper() + ' WITH CONSTRAINT > 30') #self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertTrue(rows[2].values[ds.column_index('Age')] > 30) # Command text in case of error self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value('MY DS', '?', constraint='A B') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) cmd_text = wf.modules[-1].command_text.upper() expected_text = 'MISSING VALUES FOR ? IN \'MY DS\'' + ' WITH CONSTRAINT A B' self.assertEquals(cmd_text, expected_text) mimir.finalize() def test_missing_key_lens(self): """Test MISSING_KEY lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens age_col = ds.columns[ds.column_index('Age')].identifier self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key(DS_NAME, age_col, missing_only=True) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING KEYS FOR AGE IN ' + DS_NAME.upper()) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 24) #self.db.append_workflow_module( # viztrail_id=vt.identifier, # command=cmd.load_dataset(f_handle.identifier, DS_NAME + '2') #) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key( DS_NAME, ds.columns[ds.column_index('Salary')].identifier, missing_only=True ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 55) mimir.finalize() def test_picker_lens(self): """Test PICKER lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(PICKER_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ]) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,SALARY IN ' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 5) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) # Pick another column, this time with custom name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 6) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) self.assertTrue('My Column' in columns) # Pick from a picked column self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('PICK_ONE_AGE_SALARY').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,PICK_ONE_AGE_SALARY AS \'MY COLUMN\' IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) mimir.finalize() def test_schema_matching_lens(self): """Test SCHEMA_MATCHING lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching(DS_NAME, [ {'column': 'BDate', 'type': 'int'}, {'column': 'PName', 'type': 'varchar'} ], 'new_' + DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT, PNAME VARCHAR) AS NEW_' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 2) ds = self.datastore.get_dataset(wf.modules[-1].datasets['new_' + DS_NAME]) self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.row_count, 2) # Error if adding an existing dataset self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'new_' + DS_NAME ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.db.replace_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'a_new_' + DS_NAME ), module_id=wf.modules[-1].identifier, ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS A_NEW_' + DS_NAME.upper()) # Error when adding a dataset with an invalid name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'SOME NAME' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS \'SOME NAME\'') mimir.finalize() def test_type_inference_lens(self): """Test TYPE INFERENCE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds1 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Infer type self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_type_inference(DS_NAME, 0.6) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) print wf.modules[-1].command_text.upper() self.assertEquals(wf.modules[-1].command_text.upper(), 'TYPE INFERENCE FOR COLUMNS IN ' + DS_NAME.upper() + ' WITH PERCENT_CONFORM = 0.6') # Get dataset ds2 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds2.columns), 3) self.assertEquals(ds2.row_count, 7) ds1_rows = ds1.fetch_rows() ds2_rows = ds2.fetch_rows() for i in range(ds2.row_count): self.assertEquals(ds1_rows[i].values, ds2_rows[i].values) mimir.finalize()
class TestWorkflows(unittest.TestCase): def tearDown(self): """Clean-up by dropping the MongoDB colelction used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def set_up(self): """Create an empty work trails repository.""" # Create fresh set of directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) def set_up_default(self): """Setup configuration using default Vizual engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) def set_up_mimir(self): """Setup configuration using Mimir engine.""" env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), identifier=ENGINEENV_MIMIR, packages=[PACKAGE_VIZUAL, PACKAGE_PYTHON, PACKAGE_MIMIR ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.set_up() self.datastore = MimirDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) def test_vt_default(self): """Run workflow with default configuration.""" # Create new work trail and retrieve the HEAD workflow of the default # branch self.set_up_default() self.run_python_workflow() self.set_up_default() self.run_mixed_workflow() self.set_up_default() self.run_delete_modules() self.set_up_default() self.run_erroneous_workflow() self.set_up_default() self.run_update_datasets() def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_python_workflow() self.set_up_mimir() self.run_mixed_workflow() self.set_up_mimir() self.run_delete_modules() self.set_up_mimir() self.run_erroneous_workflow() mimir.finalize() def run_delete_modules(self): """Test deletion of modules.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '28')) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 1, '42')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets['people'])) self.assertEquals(int(ds.rows[0].get_value('Age')), 28) self.assertEquals(int(ds.rows[1].get_value('Age')), 42) # DELETE UPDATE CELL self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[1].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets['people'])) self.assertEquals(int(ds.rows[0].get_value('Age')), 23) # DELETE LOAD (will introduce error) self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[0].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # DELETE last remaining module self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[0].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) def run_erroneous_workflow(self): """Test workflow that has errors.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '28')) # This should create an error because of the invalid column name self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.rename_column( DS_NAME, col_age.identifier, '')) # This should not have any effect self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, col_age.identifier, 0, '29')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # Make sure that all workflow modules have a non-negative identifier # and that they are all unique identifier = set() for m in wf.modules: self.assertTrue(m.identifier >= 0) self.assertTrue(not m.identifier in identifier) identifier.add(m.identifier) def run_mixed_workflow(self): """Test functionality to execute a workflow module.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, 'LOAD DATASET people FROM FILE dataset.csv') #print '(2) INSERT ROW' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.insert_row(DS_NAME, 1)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, 'INSERT ROW INTO people AT POSITION 1') #print '(3) Set name to Bobby and set variables' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(SET_VARIABLES_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, SET_VARIABLES_PY) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) #print '(4) Set age to 28' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, ds.column_by_name('Age').identifier, 1, '28')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text.upper(), 'UPDATE PEOPLE SET [AGE,1] = 28') ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) #print '(5) Change Alice to Bob' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.update_cell(DS_NAME, ds.column_by_name('Name').identifier, 0, 'Bob')) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text.upper(), 'UPDATE PEOPLE SET [NAME,0] = \'BOB\'') #print '(6) UPDATE DATASET WITH FILTER' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(UPDATE_DATASET_WITH_FILTER_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) cmd_text = wf.modules[-1].command_text self.assertEquals(cmd_text, UPDATE_DATASET_WITH_FILTER_PY) self.assertFalse(wf.has_error) # Ensure that all names are Bobby ds = DatasetClient( self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME])) age = [23, 28, 32] for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.get_value('Name'), 'Bobby') self.assertEquals(int(row.get_value('Age')), age[i]) def run_python_workflow(self): """Test functionality to execute a workflow module.""" vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(CREATE_DATASET_PY)) # from vizier.database.client import VizierDBClient\nv = VizierDBClient(__vizierdb__) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) self.assertEquals(wf.version, 0) self.assertEquals(len(wf.modules), 1) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) #print '(2) PRINT DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertEquals(wf.version, 1) self.assertEquals(len(wf.modules), 2) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertTrue(len(wf.modules[1].stdout) == 1) self.assertTrue(len(wf.modules[1].stderr) == 0) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertEquals(len(wf.modules[1].datasets), 1) self.assertTrue(DS_NAME in wf.modules[1].datasets) ds_id = wf.modules[1].datasets[DS_NAME] #print '(3) UPDATE DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(UPDATE_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertFalse(wf.has_error) self.assertEquals(wf.version, 2) self.assertEquals(len(wf.modules), 3) self.assertTrue(len(wf.modules[0].stdout) == 0) self.assertTrue(len(wf.modules[0].stderr) == 0) self.assertEquals(len(wf.modules[0].datasets), 1) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertEquals(wf.modules[0].datasets[DS_NAME], ds_id) self.assertTrue(len(wf.modules[1].stdout) == 1) self.assertTrue(len(wf.modules[1].stderr) == 0) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertEquals(len(wf.modules[1].datasets), 1) self.assertTrue(DS_NAME in wf.modules[1].datasets) self.assertEquals(wf.modules[1].datasets[DS_NAME], ds_id) self.assertTrue(len(wf.modules[2].stdout) == 0) self.assertTrue(len(wf.modules[2].stderr) == 0) self.assertEquals(len(wf.modules[2].datasets), 1) self.assertTrue(DS_NAME in wf.modules[2].datasets) self.assertNotEquals(wf.modules[2].datasets[DS_NAME], ds_id) #print '(4) PRINT DATASET' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertEquals(wf.version, 3) self.assertEquals(len(wf.modules), 4) self.assertEquals(wf.modules[1].stdout[0]['data'], 'Alice\nBob') self.assertTrue(len(wf.modules[3].stdout) == 1) self.assertTrue(len(wf.modules[3].stderr) == 0) self.assertEquals(wf.modules[3].stdout[0]['data'], 'NoName\nNoName') #print '(5) UPDATE DATASET WITH FILTER' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[2].identifier, command=cmd.python_cell(UPDATE_DATASET_WITH_FILTER_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertTrue(wf.has_error) self.assertEquals(wf.version, 4) self.assertEquals(len(wf.modules), 4) # print '(6) INSERT SET VARIABLES BEFORE UPDATE' self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.python_cell(SET_VARIABLES_ONLY_PY), before_id=wf.modules[2].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[4].stdout[0]['data'], 'Alice\nBobby') #print '(7) INTRODUCE ERROR' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[1].identifier, command=cmd.python_cell(PRINT_UNKNOWN_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) self.assertTrue(wf.has_error) # Ensure that the second module has output to stderr self.assertNotEquals(len(wf.modules[1].stderr), 0) # Ensure that the last two modules hav no output (either to STDOUT or # STDERR) for m in wf.modules[2:]: self.assertEquals(len(m.stdout), 0) self.assertEquals(len(m.stderr), 0) #print '(8) FIX ERROR' self.db.replace_workflow_module( viztrail_id=vt.identifier, module_id=wf.modules[1].identifier, command=cmd.python_cell(PRINT_DATASET_PY)) wf = self.db.get_workflow(viztrail_id=vt.identifier) prev_modules = modules modules = set() for m in wf.modules: self.assertNotEquals(m.identifier, -1) self.assertFalse(m.identifier in modules) modules.add(m.identifier) # Ensure that the identifier of previous modules did not change for id in prev_modules: self.assertTrue(id in modules) #print (9) DELETE MODULE UPDATE_DATASET_WITH_FILTER_PY self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[3].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[3].stdout[0]['data'], 'Alice\nBob') def run_update_datasets(self): """Test dropping and renaming of datasets.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertTrue(DS_NAME in wf.modules[-1].datasets) new_name = DS_NAME + '_renamed' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.rename_dataset( DS_NAME, new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertTrue(DS_NAME in wf.modules[0].datasets) self.assertFalse(new_name in wf.modules[0].datasets) self.assertFalse(DS_NAME in wf.modules[-1].datasets) self.assertTrue(new_name in wf.modules[-1].datasets) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.drop_dataset(new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertFalse(new_name in wf.modules[-1].datasets) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.drop_dataset(new_name)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) # Delete the Drop Dataset that failed and replace the first drop with # a Python module that prints names self.db.delete_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[-1].identifier) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.db.replace_workflow_module(viztrail_id=vt.identifier, module_id=wf.modules[-1].identifier, command=cmd.python_cell(""" for row in vizierdb.get_dataset('""" + new_name + """').rows: print row.get_value('Name') """)) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].stdout[0]['data'], 'Alice\nBob') self.assertFalse(DS_NAME in wf.modules[-1].datasets) self.assertTrue(new_name in wf.modules[-1].datasets)
class TestWorkflowUpdates(unittest.TestCase): def setUp(self): """Create an empty work trails repository.""" # Create fresh set of directories self.config = AppConfig() env = ExecEnv( FileServerConfig().from_dict({'directory': FILESERVER_DIR}), packages=[PACKAGE_VIZUAL, PACKAGE_PLOT ]).from_dict({'datastore': { 'directory': DATASTORE_DIR }}) self.ENGINE_ID = env.identifier self.config.envs[self.ENGINE_ID] = env self.config.fileserver = env.fileserver for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) os.mkdir(d) self.datastore = FileSystemDataStore(DATASTORE_DIR) self.fileserver = DefaultFileServer(FILESERVER_DIR) self.db = FileSystemViztrailRepository(VIZTRAILS_DIR, {env.identifier: env}) self.api = VizierWebService(self.db, self.datastore, self.fileserver, self.config) def tearDown(self): """Clean-up by dropping the MongoDB collection used by the engine. """ # Delete directories for d in [DATASTORE_DIR, FILESERVER_DIR, VIZTRAILS_DIR]: if os.path.isdir(d): shutil.rmtree(d) def test_view_urls(self): """Ensure that the urls for workflow views get updated correctly when the workflow is modified.""" f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(self.ENGINE_ID, {'name': 'My Project'}) #print '(1) CREATE DATASET' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) #print '(2) PLOT' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.create_plot( DS_NAME, CHART_NAME, series=[{ 'series_column': 2 }])) url = self.api.get_workflow( vt.identifier, DEFAULT_BRANCH)['state']['charts'][0]['links'][0]['href'] self.assertTrue('master/workflows/1/modules/1/views' in url) # print '(3) UPDATE CELL' self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.update_cell( DS_NAME, 0, 0, '28')) url = self.api.get_workflow( vt.identifier, DEFAULT_BRANCH)['state']['charts'][0]['links'][0]['href'] self.assertTrue('master/workflows/2/modules/2/views' in url)
class TestVizierClient(unittest.TestCase): def setUp(self): """Delete metadata file if it exists.""" # Drop directorie self.tearDown() def tearDown(self): """Clean-up by dropping file server directory. """ if os.path.isdir(DATASTORE_DIR): shutil.rmtree(DATASTORE_DIR) if os.path.isdir(SERVER_DIR): shutil.rmtree(SERVER_DIR) def test_fs_client(self): """Run tests for default engine and file server data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = FileSystemDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) def test_mem_client(self): """Run tests for default engine and in-memory data store.""" self.fs = DefaultFileServer(SERVER_DIR) self.ds = InMemDataStore() self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) def test_mimir_client(self): """Run tests for default engine and Mimir data store.""" mimir.initialize() self.fs = DefaultFileServer(SERVER_DIR) self.ds = MimirDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) mimir.finalize() def run_client_tests(self, client): """Test creating and updating a dataset via the client.""" ds = DatasetClient() ds.insert_column('Name') ds.insert_column('Age') ds.insert_row(['Alice', '23']) ds.insert_row(['Bob', '25']) client.create_dataset('MyDataset', ds) # Ensure the returned dataset contains the input data ds = client.get_dataset('MyDataset') self.assertEquals([c.name for c in ds.columns], ['Name', 'Age']) self.assertEquals([str(v) for v in ds.rows[0].values], ['Alice', '23']) self.assertEquals([str(v) for v in ds.rows[1].values], ['Bob', '25']) # Update dataset ds.rows[1].set_value('Age', '26') client.update_dataset('MyDataset', ds) ds = client.get_dataset('MyDataset') self.assertEquals([str(v) for v in ds.rows[1].values], ['Bob', '26']) # Value error when creating dataset with existing name with self.assertRaises(ValueError): client.create_dataset('MyDataset', ds) # Value error when retrieving unknown dataset with self.assertRaises(ValueError): client.get_dataset('SomeDataset') # Ensure the returned dataset contains the modified data client.rename_dataset('MyDataset', 'SomeDataset') ds = client.get_dataset('SomeDataset') client.update_dataset('SomeDataset', ds) # Move columns around ds = self.ds.load_dataset(self.fs.upload_file(CSV_FILE)) ds = client.create_dataset('people', DatasetClient(ds)) col_1 = [row.get_value(1) for row in ds.rows] ds.insert_column('empty', 2) ds = client.update_dataset('people', ds) col_2 = [row.get_value(2) for row in ds.rows] ds.move_column('empty', 1) ds = client.update_dataset('people', ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.values[1], col_2[i]) self.assertEquals(row.values[2], col_1[i]) # Rename ds.columns[1].name = 'allnone' ds = client.update_dataset('people', ds) for i in range(len(ds.rows)): row = ds.rows[i] self.assertEquals(row.get_value('allnone'), col_2[i]) self.assertEquals(row.values[2], col_1[i]) # Insert row row = ds.insert_row() row.set_value('Name', 'Zoe') ds = client.create_dataset('upd', ds) self.assertEquals(len(ds.rows), 3) r2 = ds.rows[2] self.assertEquals(r2.identifier, 2) self.assertEquals(r2.values, ['Zoe', None, None, None]) # Annotations ds = client.get_dataset('people') annotations = ds.rows[0].annotations('Age') annotations.add('user:comment', 'My Comment') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) anno = annotations[0] self.assertEquals(anno.key, 'user:comment') self.assertEquals(anno.value, 'My Comment') ds.rows[0].annotations('Age').add('user:comment', 'Another Comment') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 2) self.assertEquals(ds.rows[0].annotations('Age').keys(), ['user:comment']) values = [a.value for a in annotations] for val in ['My Comment', 'Another Comment']: self.assertTrue(val in values) ds.rows[0].annotations('Age').update(identifier=anno.identifier, key='user:issue', value='Some Issue') ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) keys = ds.rows[0].annotations('Age').keys() for key in ['user:comment', 'user:issue']: self.assertTrue(key in keys) values = [ a.value for a in ds.rows[0].annotations('Age').find_all('user:comment') ] for val in ['Another Comment']: self.assertTrue(val in values) values = [ a.value for a in ds.rows[0].annotations('Age').find_all('user:issue') ] for val in ['Some Issue']: self.assertTrue(val in values) ds.rows[0].annotations('Age').update(identifier=anno.identifier) ds = client.update_dataset('people', ds) annotations = ds.rows[0].annotations('Age').find_all('user:issue') self.assertEquals(len(annotations), 0) annotations = ds.rows[0].annotations('Age').find_all('user:comment') self.assertEquals(len(annotations), 1) # Delete column ds = client.get_dataset('people') ds.delete_column('Age') client.update_dataset('people', ds) ds = client.get_dataset('people') names = [col.name.upper() for col in ds.columns] self.assertTrue('NAME' in names) self.assertFalse('AGE' in names)