def test_type_inference_lens(self): """Test TYPE INFERENCE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds1 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Infer type self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_type_inference(DS_NAME, 0.6) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) print wf.modules[-1].command_text.upper() self.assertEquals(wf.modules[-1].command_text.upper(), 'TYPE INFERENCE FOR COLUMNS IN ' + DS_NAME.upper() + ' WITH PERCENT_CONFORM = 0.6') # Get dataset ds2 = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds2.columns), 3) self.assertEquals(ds2.row_count, 7) ds1_rows = ds1.fetch_rows() ds2_rows = ds2.fetch_rows() for i in range(ds2.row_count): self.assertEquals(ds1_rows[i].values, ds2_rows[i].values) mimir.finalize()
def test_load(self): """Run workflow with default configuration.""" mimir.initialize() self.update_cell(CSV_FILE, 2, 0, 'int', 10) self.update_cell(CSV_FILE, 2, 0, 'int', 10.3, result_type='real') self.update_cell(CSV_FILE, 2, 0, 'int', None) self.update_cell(CSV_FILE, 3, 0, 'real', 10.3) self.update_cell(CSV_FILE, 3, 0, 'real', 10, result_value=10.0) self.update_cell(CSV_FILE, 3, 0, 'real', 'A', result_type='varchar') self.update_cell(CSV_FILE, 3, 0, 'real', None) self.update_cell(CSV_FILE, 4, 0, 'varchar', 'A') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10, result_value='10') self.update_cell(CSV_FILE, 4, 0, 'varchar', 10.87, result_value='10.87') self.update_cell(CSV_FILE, 4, 0, 'varchar', None) self.update_cell(CSV_FILE, 8, 0, 'bool', 'False', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', '0', result_value=False) self.update_cell(CSV_FILE, 8, 0, 'bool', None) self.update_cell(CSV_FILE, 8, 1, 'bool', True, result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', '1', result_value=True) self.update_cell(CSV_FILE, 8, 1, 'bool', 'A', result_value='A', result_type='varchar') self.update_cell(CSV_FILE, 8, 1, 'bool', 10.87, result_value='10.87', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '2018-05-09') self.update_cell(CSV_FILE_DT, 1, 0, 'date', '20180509', result_value='20180509', result_type='varchar') self.update_cell(CSV_FILE_DT, 1, 0, 'date', None) self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', '2018-05-09 12:03:22.0000') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', 'ABC', result_value='ABC', result_type='varchar') self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', None) mimir.finalize()
def test_load(self): """Run workflow with default configuration.""" # Ignore files that raised errors (or are taking too much time to load) ignore_files = ['JSONOUTPUTWIDE.csv'] data_types = set() mimir.initialize() for filename in os.listdir(LOAD_DIR): if filename in ignore_files: continue print 'LOAD ' + filename filename = os.path.join(LOAD_DIR, filename) f_handle = self.fileserver.upload_file(filename) ds = self.datastore.load_dataset(f_handle) ds_load = self.datastore.get_dataset(ds.identifier) for col in ds_load.columns: data_types.add(col.data_type) print '\t' + col.name_in_rdb + ' AS ' + col.name + '(' + col.data_type + ')' print '\t' + str(ds.row_count) + ' row(s)' self.assertEquals(len(ds.columns), len(ds_load.columns)) self.assertEquals(ds.column_counter, ds_load.column_counter) self.assertEquals(ds.row_counter, ds_load.row_counter) rows = ds.fetch_rows() self.assertEquals(ds.row_counter, len(rows)) self.assertEquals(ds.row_count, len(rows)) for i in range(len(rows)): row = rows[i] self.assertEquals(row.identifier, i) self.assertEquals(len(row.values), len(ds.columns)) mimir.finalize() print data_types
def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_workflow() mimir.finalize()
def test_mimir_datastore(self): """Run test for Mimir datastore.""" mimir.initialize() self.run_tests(MIMIR_DATASTORE) self.set_up(MIMIR_DATASTORE) self.load_tsv() self.tear_down(MIMIR_DATASTORE) mimir.finalize()
def test_mimir_client(self): """Run tests for default engine and Mimir data store.""" mimir.initialize() self.fs = DefaultFileServer(SERVER_DIR) self.ds = MimirDataStore(DATASTORE_DIR) self.run_client_tests( VizierDBClient(self.ds, dict(), DefaultVizualEngine(self.ds, self.fs))) mimir.finalize()
def test_geocode_lens(self): """Test GEOCODE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(GEO_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Geocode Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE', house_nr=ds.column_by_name('STRNUMBER').identifier, street=ds.column_by_name('STRNAME').identifier, city=ds.column_by_name('CITY').identifier, state=ds.column_by_name('STATE').identifier ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE HOUSE_NUMBER=STRNUMBER,STREET=STRNAME,CITY=CITY,STATE=STATE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 6) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_geocode( DS_NAME, 'GOOGLE' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'GEOCODE PEOPLE USING GOOGLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 3) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 8) mimir.finalize()
def test_vt_mimir(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() self.set_up_mimir() self.run_python_workflow() self.set_up_mimir() self.run_mixed_workflow() self.set_up_mimir() self.run_delete_modules() self.set_up_mimir() self.run_erroneous_workflow() mimir.finalize()
def test_missing_key_lens(self): """Test MISSING_KEY lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens age_col = ds.columns[ds.column_index('Age')].identifier self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key(DS_NAME, age_col, missing_only=True) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING KEYS FOR AGE IN ' + DS_NAME.upper()) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 24) #self.db.append_workflow_module( # viztrail_id=vt.identifier, # command=cmd.load_dataset(f_handle.identifier, DS_NAME + '2') #) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_key( DS_NAME, ds.columns[ds.column_index('Salary')].identifier, missing_only=True ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 3) rows = ds.fetch_rows() self.assertEquals(len(rows), 55) mimir.finalize()
def test_key_repair_lens(self): """Test KEY REPAIR lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(KEY_REPAIR_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) ds1 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair(DS_NAME, ds1.column_by_name('Empid').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR EMPID IN ' + DS_NAME.upper()) # Get dataset ds2 = self.datastore.get_dataset(wf.modules[0].datasets[DS_NAME]) self.assertEquals(ds1.row_count, ds2.row_count) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertEquals(len(ds.columns), 4) self.assertEquals(ds.row_count, 2) names = set() empids = set() rowids = set() for row in DatasetClient(dataset=ds).rows: rowids.add(row.identifier) empids.add(int(row.get_value('empid'))) names.add(row.get_value('name')) self.assertTrue(1 in empids) self.assertTrue(2 in rowids) self.assertTrue('Alice' in names) self.assertTrue('Carla' in names) # Test error case and command text self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_key_repair('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'KEY REPAIR FOR \'MY COL\' IN \'MY DS\'') mimir.finalize()
def test_datastore(self): """Test functionality of the file server data store.""" mimir.initialize() ds = self.db.load_dataset(self.fileserver.upload_file(CSV_FILE)) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(ds.row_count, 2) cols = [('NAME', COL_PREFIX + '0', 'varchar'), ('AGE', COL_PREFIX + '1', 'int'), ('SALARY', COL_PREFIX + '2', 'varchar')] control_rows = [(0, ['Alice', 23, '35K']), (1, ['Bob', 32, '30K'])] for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Get dataset and repeat tests ds = self.db.get_dataset(ds.identifier) self.assertEquals(ds.column_counter, 3) self.assertEquals(ds.row_counter, 2) self.assertEquals(len(ds.row_ids), 2) for column in ds.columns: self.validate_column(column, cols[column.identifier]) self.validate_rowid_column(ds.rowid_column) self.validate_rows(ds.fetch_rows(), control_rows) # Create dataset names = ['NAME', 'AGE', 'SALARY'] rows = ds.fetch_rows() rows[0].values[0] = 'Jane' rows = [rows[1], rows[0]] ds = self.db.create_dataset(columns=ds.columns, rows=rows) ds = self.db.get_dataset(ds.identifier) for i in range(3): col = ds.columns[i] self.assertEquals(col.identifier, i) self.assertEquals(col.name, names[i]) rows = ds.fetch_rows() for i in range(len(rows)): row = rows[(len(rows) - 1) - i] self.assertEquals(row.identifier, i) self.assertEquals(rows[1].values[0], 'Jane') # DONE mimir.finalize()
def test_domain_lens(self): """Test DOMAIN lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) col_age = ds.column_by_name('Age') self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain(DS_NAME, col_age.identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR AGE IN PEOPLE') self.assertFalse(wf.has_error) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Introduce an error. Make sure command formating is correct self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_domain('MY DS', 'MY COL') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'DOMAIN FOR \'MY COL\' IN \'MY DS\'') mimir.finalize()
def test_annotations(self): """Test DOMAIN lens.""" # Create new work trail and create dataset from CSV file mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name': 'My Project'}) self.db.append_workflow_module(viztrail_id=vt.identifier, command=cmd.load_dataset( f_handle.identifier, DS_NAME)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier)) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) annos = ds.get_annotations(column_id=1, row_id=2) self.assertEquals(len(annos), 2) for anno in annos: self.assertEquals(anno.key, ANNO_UNCERTAIN) mimir.finalize()
def test_schema_matching_lens(self): """Test SCHEMA_MATCHING lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching(DS_NAME, [ {'column': 'BDate', 'type': 'int'}, {'column': 'PName', 'type': 'varchar'} ], 'new_' + DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT, PNAME VARCHAR) AS NEW_' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 2) ds = self.datastore.get_dataset(wf.modules[-1].datasets['new_' + DS_NAME]) self.assertEquals(len(ds.columns), 2) self.assertEquals(ds.row_count, 2) # Error if adding an existing dataset self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'new_' + DS_NAME ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.db.replace_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'a_new_' + DS_NAME ), module_id=wf.modules[-1].identifier, ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS A_NEW_' + DS_NAME.upper()) # Error when adding a dataset with an invalid name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_schema_matching( DS_NAME, [{'column': 'BDate', 'type': 'int'}], 'SOME NAME' ) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'SCHEMA MATCHING PEOPLE (BDATE INT) AS \'SOME NAME\'') mimir.finalize()
def test_missing_value_lens(self): """Test MISSING_VALUE lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(INCOMPLETE_CSV_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.assertFalse(wf.has_error) # Missing Value Lens self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value(DS_NAME, ds.column_by_name('AGE').identifier) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) self.assertEquals(len(wf.modules), 2) # Get dataset ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertNotEquals(rows[2].values[ds.column_index('Age')], '') # Annotations annotations = ds.get_annotations(column_id=1, row_id=4) self.assertEquals(len(annotations), 2) # MISSING VALUE Lens with value constraint vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'New Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value( DS_NAME, ds.column_by_name('AGE').identifier, constraint='> 30') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.has_error: print wf.modules[-1].stderr[0] self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper() + ' WITH CONSTRAINT > 30') #self.assertEquals(wf.modules[-1].command_text.upper(), 'MISSING VALUES FOR AGE IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) rows = ds.fetch_rows() self.assertTrue(rows[2].values[ds.column_index('Age')] > 30) # Command text in case of error self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_missing_value('MY DS', '?', constraint='A B') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertTrue(wf.has_error) cmd_text = wf.modules[-1].command_text.upper() expected_text = 'MISSING VALUES FOR ? IN \'MY DS\'' + ' WITH CONSTRAINT A B' self.assertEquals(cmd_text, expected_text) mimir.finalize()
def test_mimir_engine(self): """Test functionality if the Mimir VizUAL engine.""" import vistrails.packages.mimir.init as mimir mimir.initialize() self.run_engine_tests(ENGINEENV_MIMIR) mimir.finalize()
row_ids[row_index]).set_annotation( 'mimir:uncertain', 'true') return annotations def get_tempfile(): """Return the path to a temporary CSV file. Try to get a unique name to avoid problems with existing datasets. Returns ------- string """ tmp_prefix = 'DS_' + get_unique_identifier() return tempfile.mkstemp(suffix='.csv', prefix=tmp_prefix)[1] CSV_FILE = './dataset_load_test.csv' #CSV_FILE = '../data/dataset.csv' #CSV_FILE = './reload_dataset.csv' mimir.initialize() ds = load_dataset(os.path.abspath(CSV_FILE)) print[col.name for col in ds.columns] for row_id in ds.row_ids: for col in ds.columns: anno = ds.annotations.for_cell(col.identifier, row_id) mimir.finalize()
def test_picker_lens(self): """Test PICKER lens.""" # Create new work trail and retrieve the HEAD workflow of the default # branch mimir.initialize() f_handle = self.fileserver.upload_file(PICKER_FILE) vt = self.db.create_viztrail(ENGINE_ID, {'name' : 'My Project'}) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.load_dataset(f_handle.identifier, DS_NAME) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Missing Value Lens ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ]) ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,SALARY IN ' + DS_NAME.upper()) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 5) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) # Pick another column, this time with custom name self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('Salary').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) self.assertFalse(wf.has_error) # Get dataset self.assertEquals(len(wf.modules[-1].datasets), 1) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) columns = [c.name for c in ds.columns] self.assertEquals(len(ds.columns), 6) self.assertTrue('PICK_ONE_AGE_SALARY' in columns) self.assertTrue('My Column' in columns) # Pick from a picked column self.db.append_workflow_module( viztrail_id=vt.identifier, command=cmd.mimir_picker(DS_NAME, [ {'pickFrom': ds.column_by_name('Age').identifier}, {'pickFrom': ds.column_by_name('PICK_ONE_AGE_SALARY').identifier} ], pick_as='My Column') ) wf = self.db.get_workflow(viztrail_id=vt.identifier) if wf.modules[-1].has_error: print wf.modules[-1].stderr self.assertFalse(wf.has_error) self.assertEquals(wf.modules[-1].command_text.upper(), 'PICK FROM AGE,PICK_ONE_AGE_SALARY AS \'MY COLUMN\' IN ' + DS_NAME.upper()) ds = self.datastore.get_dataset(wf.modules[-1].datasets[DS_NAME]) mimir.finalize()