def test_load_dataset(self): """Test validation of the load dataset command.""" vizual.load_dataset(dataset_name='ABC', file={pckg.FILE_ID: '493ewkfj485ufjw490feofj'}, validate=True) vizual.load_dataset(dataset_name='ABC', file={pckg.FILE_ID: '493ewkfj485ufjw490feofj'}, load_format='json', detect_headers=True, infer_types=True, validate=True)
def test_skip_modules(self): """Test replacing a module in a workflow where dome cells do not require to be re-executed because they access a different dataset. """ project = self.engine.projects.create_project() branch_id = project.get_default_branch().identifier fh1 = project.filestore.upload_file(CSV_FILE) fh2 = project.filestore.upload_file(CSV_FILE) self.engine.append_workflow_module( project_id=project.identifier, branch_id=branch_id, command=load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh1.identifier})) self.engine.append_workflow_module( project_id=project.identifier, branch_id=branch_id, command=load_dataset(dataset_name=SECOND_DATASET_NAME, file={pckg.FILE_ID: fh2.identifier})) for i in range(10): if i in [0, 2, 4, 6, 8]: cmd = command = python_cell(PY_ADD_ONE) else: cmd = command = python_cell(PY_ADD_SECOND) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) while project.viztrail.default_branch.head.is_active: time.sleep(0.1) wf = project.viztrail.default_branch.head self.assertTrue(wf.get_state().is_success) datasets = [module.datasets for module in wf.modules[4:]] self.assert_module_count_is(project, 12) # Replace a module that updates the first datasets. All modules that # access the second dataset should remain unchanged. cmd = command = python_cell(PY_ADD_TEN) self.engine.replace_workflow_module(project_id=project.identifier, branch_id=branch_id, module_id=wf.modules[4].identifier, command=cmd) while project.viztrail.default_branch.head.is_active: time.sleep(0.1) wf = project.viztrail.default_branch.head self.assertTrue(wf.get_state().is_success) i = 0 for module in wf.modules[4:]: self.assertNotEqual(datasets[i][DATASET_NAME].identifier, module.datasets[DATASET_NAME].identifier) self.assertEqual(datasets[i][SECOND_DATASET_NAME].identifier, module.datasets[SECOND_DATASET_NAME].identifier) i += 1
def test_create_api_from_dictionary(self): """Test creating the processor instance with properties parameter instead of api. """ processor = VizualTaskProcessor( properties={ PROPERTY_API: ClassLoader.to_dict( module_name='vizier.engine.packages.vizual.api.fs', class_name='DefaultVizualApi') }) fh = self.filestore.upload_file(CSV_FILE) cmd = vizual.load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier}, validate=True) result = processor.compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( project_id=5, datastore=self.datastore, filestore=self.filestore, artifacts={})) self.assertIsNotNone(result.provenance.write) self.assertTrue(DATASET_NAME in result.provenance.write) dataset_id = result.provenance.write[DATASET_NAME].identifier self.assertTrue(result.provenance.read is None or len(result.provenance.read) == 0) self.assertIsNotNone(result.provenance.resources) self.assertEqual(result.provenance.resources[RESOURCE_DATASET], dataset_id)
def test_load_dataset(self): """Test functionality to load a dataset.""" # Create a new dataset fh = self.filestore.upload_file(CSV_FILE) cmd = vizual.load_dataset(dataset_name='ABC', file={pckg.FILE_ID: fh.identifier}, validate=True) result = self.processor.compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( project_id=5, datastore=self.datastore, filestore=self.filestore, artifacts={})) self.assertIsNotNone(result.provenance.write) self.assertTrue('abc' in result.provenance.write) dataset_id = result.provenance.write['abc'].identifier self.assertTrue(result.provenance.read is None or len(result.provenance.read) == 0) self.assertIsNotNone(result.provenance.resources) self.assertEqual(result.provenance.resources[RESOURCE_DATASET], dataset_id) # Running load again will not change the dataset identifier result = self.processor.compute( command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext(project_id=5, datastore=self.datastore, filestore=self.filestore, artifacts={}, resources=result.provenance.resources)) self.assertEqual(result.provenance.write['abc'].identifier, dataset_id) self.assertEqual(result.provenance.resources[RESOURCE_DATASET], dataset_id)
def test_can_execute(self): """Test the can execute method with different commands.""" self.assertTrue( self.backend.can_execute( vizual.load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: '000'}, validate=True))) self.assertTrue( self.backend.can_execute( vizual.update_cell(dataset_name=DATASET_NAME, column=1, row=0, value=9, validate=True))) self.assertTrue( self.backend.can_execute( pycell.python_cell(source=CREATE_DATASET_PY, validate=True))) self.assertFalse( self.backend.can_execute( vizual.insert_row(dataset_name=DATASET_NAME, position=1, validate=True))) self.assertFalse( self.backend.can_execute( vizual.drop_dataset(dataset_name=DATASET_NAME, validate=True)))
def test_execute_with_error(self): """Test running a sequence of statements where we (potentially)append to a workflow that in in error state. """ project = self.engine.projects.create_project() branch_id = project.get_default_branch().identifier fh = project.filestore.upload_file(CSV_FILE) cmd = load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier}) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) for i in range(20): cmd = command = python_cell(PY_ADD_ONE_ERROR) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) while project.viztrail.default_branch.head.is_active: time.sleep(0.1) wf = project.viztrail.default_branch.head self.assertIsNotNone(wf) # The second module will raise an error. All following modules should # be canceled for i in range(20): module = wf.modules[i] if i == 0: self.assertTrue(module.is_success) elif i == 1: self.assertTrue(module.is_error) else: self.assertTrue(module.is_canceled)
def create_workflow(self, project): """Create a completed workflow by loading the data file and updating the age value of the first row ten times. """ branch_id = project.viztrail.default_branch.identifier fh = project.filestore.upload_file(CSV_FILE) cmd = load_dataset( dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier} ) self.engine.append_workflow_module( project_id=project.identifier, branch_id=branch_id, command=cmd ) for i in range(10): cmd = python_cell(PY_ADD_ONE) self.engine.append_workflow_module( project_id=project.identifier, branch_id=branch_id, command=cmd ) while project.viztrail.default_branch.head.is_active: time.sleep(0.1) for module in project.viztrail.default_branch.head.modules: # print("--------=======--------") # print(module.command) # print(module.outputs) # print(module.provenance) if not module.is_success: print(module.outputs) self.assertTrue(module.is_success) self.assertTrue(DATASET_NAME in module.provenance.write) return branch_id
def test_execute(self): """Test executing a sequence of supported commands.""" context = dict() fh = self.backend.projects.get_project( self.PROJECT_ID).filestore.upload_file(CSV_FILE) cmd = vizual.load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier}, validate=True) result = self.backend.execute(task=TaskHandle( task_id='000', project_id=self.PROJECT_ID), command=cmd, context=context) self.assertTrue(result.is_success) state = result.provenance.get_database_state(prev_state=dict()) context = task_context(state) cmd = vizual.update_cell(dataset_name=DATASET_NAME, column=1, row=0, value=9, validate=True) result = self.backend.execute(task=TaskHandle( task_id='000', project_id=self.PROJECT_ID), command=cmd, context=context) self.assertTrue(result.is_success) state = result.provenance.get_database_state(prev_state=state) self.assertNotEqual(context[DATASET_NAME], task_context(state)[DATASET_NAME]) context = task_context(state) cmd = pycell.python_cell(source=CREATE_DATASET_PY, validate=True) result = self.backend.execute(task=TaskHandle( task_id='000', project_id=self.PROJECT_ID), command=cmd, context=context) self.assertTrue(result.is_success) state = result.provenance.get_database_state(prev_state=state) self.assertTrue(SECOND_DATASET_NAME in state) self.assertEqual(context[DATASET_NAME], task_context(state)[DATASET_NAME]) context = task_context(state) cmd = vizual.update_cell(dataset_name=SECOND_DATASET_NAME, column=1, row=0, value=9, validate=True) result = self.backend.execute(task=TaskHandle( task_id='000', project_id=self.PROJECT_ID), command=cmd, context=context) self.assertTrue(result.is_success) state = result.provenance.get_database_state(prev_state=state) self.assertEqual(context[DATASET_NAME], task_context(state)[DATASET_NAME]) self.assertNotEqual(context[SECOND_DATASET_NAME], task_context(state)[SECOND_DATASET_NAME])
def test_workflow(self): """Run workflows for Mimir configurations.""" # Create new work trail and retrieve the HEAD workflow of the default # branch project = self.engine.projects.create_project() branch_id = project.viztrail.default_branch.identifier fh = project.filestore.upload_file(CSV_FILE) cmd = load_dataset(dataset_name=DATASET_PEOPLE, file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: os.path.basename(CSV_FILE) }, infer_types=True) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) cmd = python_cell(PY_ADD_ONE) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) wf = project.viztrail.default_branch.head while project.viztrail.default_branch.head.is_active: time.sleep(0.1) for m in wf.modules: print(m) self.assertTrue(m.is_success) cmd = python_cell(CREATE_DATASET_PY) self.engine.insert_workflow_module( project_id=project.identifier, branch_id=branch_id, before_module_id=wf.modules[0].identifier, command=cmd) wf = project.viztrail.default_branch.head while project.viztrail.default_branch.head.is_active: time.sleep(0.1) for m in wf.modules: print(m) self.assertTrue(m.is_success) datasets = wf.modules[0].provenance.write self.assertTrue(DATASET_FRIENDS in datasets) self.assertFalse(DATASET_PEOPLE in datasets) for m in wf.modules[1:]: datasets = m.provenance.get_database_state(datasets) self.assertTrue(DATASET_FRIENDS in datasets) self.assertTrue(DATASET_PEOPLE in datasets) ds = project.datastore.get_dataset(datasets[DATASET_PEOPLE].identifier) rows = ds.fetch_rows() self.assertEqual(rows[0].values, ['Alice', 24]) self.assertEqual(rows[1].values, ['Bob', 32]) ds = project.datastore.get_dataset( datasets[DATASET_FRIENDS].identifier) rows = ds.fetch_rows() self.assertEqual(rows[0].values, ['Yonder', 23]) self.assertEqual(rows[1].values, ['Zoe', 34])
def load_dataset(self): """Load a single dataset and return the resulting database state.""" fh = self.filestore.upload_file(CSV_FILE) cmd = vizual.load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier}, validate=True) result = self.processor.compute(command_id=cmd.command_id, arguments=cmd.arguments, context=TaskContext( datastore=self.datastore, filestore=self.filestore)) return result.provenance.write
def test_load_dataset(self): """Test validation of load dataset command.""" db = FileSystemFilestore(SERVER_DIR) fh = db.upload_file(CSV_FILE) cmd = load_dataset(dataset_name='ds', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: fh.file_name }, validate=True).to_external_form(command=PACKAGE.get( vizual.VIZUAL_LOAD), datasets=DATASETS) self.assertEqual(cmd, 'LOAD DATASET ds FROM ' + fh.file_name) cmd = load_dataset(dataset_name='ds', file={ pckg.FILE_URL: 'http://some.file.url' }, validate=True).to_external_form(command=PACKAGE.get( vizual.VIZUAL_LOAD), datasets=DATASETS) self.assertEqual(cmd, 'LOAD DATASET ds FROM http://some.file.url') cmd = load_dataset(dataset_name='ds', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_URL: 'http://some.file.url' }, validate=True).to_external_form(command=PACKAGE.get( vizual.VIZUAL_LOAD), datasets=DATASETS) self.assertEqual(cmd, 'LOAD DATASET ds FROM http://some.file.url') cmd = load_dataset(dataset_name='ds', file={ pckg.FILE_ID: 'Some File' }, validate=True).to_external_form(command=PACKAGE.get( vizual.VIZUAL_LOAD), datasets=DATASETS) self.assertEqual(cmd, 'LOAD DATASET ds FROM \'Some File\'')
def create_workflow(self, project): """Create a completed workflow by loading the data file and updating the age value of the first row ten times. """ branch_id = project.viztrail.default_branch.identifier fh = project.filestore.upload_file(CSV_FILE) cmd = load_dataset(dataset_name=DATASET_NAME, file={pckg.FILE_ID: fh.identifier}) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) for i in range(10): cmd = python_cell(PY_ADD_ONE) self.engine.append_workflow_module(project_id=project.identifier, branch_id=branch_id, command=cmd) while project.viztrail.default_branch.head.is_active: time.sleep(0.1) return branch_id
def test_create_synchronous_workflow(self): """Create workflow by appending a sequence of modules that are executed synchronously. """ project = self.engine.projects.create_project() # MODULE 1 # -------- # LOAD people fh = project.filestore.upload_file(PEOPLE_FILE) module = self.engine.append_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, command=load_dataset(dataset_name='people', file={pckg.FILE_ID: fh.identifier}, validate=True)) self.assertTrue(module.is_success) self.assertTrue('people' in module.provenance.write) self.assertEqual(len(module.provenance.write['people'].columns), 2) # MODULE 2 # -------- # UPDATE CELL module = self.engine.append_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, command=update_cell(dataset_name='people', column=1, row=0, value='42', validate=True)) print("STATUS: {}".format(module)) self.assertTrue(module.is_success) self.assertTrue('people' in module.provenance.write) # MODULE 3 # -------- # LOAD employee fh = project.filestore.upload_file(EMPLOYEE_FILE) module = self.engine.append_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, command=load_dataset(dataset_name='employee', file={pckg.FILE_ID: fh.identifier}, validate=True)) self.assertTrue(module.is_success) self.assertFalse('people' in module.provenance.write) self.assertTrue('employee' in module.provenance.write) # # Reload engine and check the module states # self.engine = get_engine(AppConfig()) project = self.engine.projects.get_project(project.identifier) modules = project.get_default_branch().get_head().modules self.assertEqual(len(modules), 3) for m in modules: self.assertTrue(m.is_success) self.assertIsNotNone(m.timestamp.created_at) self.assertIsNotNone(m.timestamp.started_at) self.assertIsNotNone(m.timestamp.finished_at) self.assertIsNotNone(m.provenance.write) self.assertTrue('people' in modules[0].provenance.write) self.assertTrue('employee' in modules[-1].provenance.write) self.assertNotEqual(modules[0].provenance.write['people'].identifier, modules[1].provenance.write['people'].identifier)
def test_create_synchronous_workflow_with_errors(self): """Create workflow by appending a sequence of modules that are executed synchronously. """ project = self.engine.projects.create_project() # MODULE 1 # -------- # LOAD people fh = project.filestore.upload_file(PEOPLE_FILE) module = self.engine.append_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, command=load_dataset(dataset_name='people', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: os.path.basename(PEOPLE_FILE) }, validate=True)) project = self.engine.projects.get_project(project.identifier) modules = project.get_default_branch().get_head().modules for m in modules: print(m) self.assertTrue(m.is_success) # MODULE 2 # -------- # UPDATE CELL module = self.engine.append_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, command=update_cell(dataset_name='employee', column=1, row=0, value='42', validate=True)) self.assertTrue(module.is_error) # MODULE 2 # -------- # INSERT employee fh = project.filestore.upload_file(EMPLOYEE_FILE) result = self.engine.insert_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, before_module_id=module.identifier, command=load_dataset(dataset_name='employee', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: os.path.basename(EMPLOYEE_FILE) }, validate=True)) self.assertEqual(len(result), 2) # Wait for the operations to finish while project.viztrail.default_branch.head.is_active: time.sleep(0.1) # # Reload engine and check the module states # self.engine = get_engine(AppConfig()) project = self.engine.projects.get_project(project.identifier) modules = project.get_default_branch().get_head().modules for m in modules: print(m) self.assertTrue(m.is_success) # MODULE 1 # -------- # UPDATE CELL module = self.engine.insert_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, before_module_id=modules[0].identifier, command=update_cell(dataset_name='friends', column=1, row=0, value='43', validate=True)) # Wait for the operations to finish while project.viztrail.default_branch.head.is_active: time.sleep(0.1) modules = project.get_default_branch().get_head().modules self.assertEqual(len(modules), 4) self.assertTrue(modules[0].is_error) for m in modules[1:]: self.assertTrue(m.is_canceled) # MODULE 1 # -------- # INSERT friends fh = project.filestore.upload_file(EMPLOYEE_FILE) result = self.engine.insert_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, before_module_id=modules[0].identifier, command=load_dataset(dataset_name='friends', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: os.path.basename(EMPLOYEE_FILE) }, validate=True)) self.assertEqual(len(result), 5) # Wait for the operations to finish while project.viztrail.default_branch.head.is_active: time.sleep(0.1) modules = project.get_default_branch().get_head().modules self.assertEqual(len(modules), 5) for m in modules: self.assertTrue(m.is_success) self.assertEqual(len(modules[0].provenance.write['friends'].columns), 3) # REPLACE MODULE 1 # ---------------- # Load people dataset instead employee fh = project.filestore.upload_file(PEOPLE_FILE) result = self.engine.replace_workflow_module( project_id=project.identifier, branch_id=project.get_default_branch().identifier, module_id=modules[0].identifier, command=load_dataset(dataset_name='friends', file={ pckg.FILE_ID: fh.identifier, pckg.FILE_NAME: os.path.basename(PEOPLE_FILE) }, validate=True)) self.assertEqual(len(result), 5) # Wait for the operations to finish while project.viztrail.default_branch.head.is_active: time.sleep(0.1) modules = project.get_default_branch().get_head().modules self.assertEqual(len(modules), 5) for m in modules: self.assertTrue(m.is_success) self.assertEqual(len(modules[0].provenance.write['friends'].columns), 2) ds = project.datastore.get_dataset( modules[0].provenance.write['friends'].identifier) self.assertEqual(ds.fetch_rows()[0].values[1], 23) # # Reload engine and check the module states # self.engine = get_engine(AppConfig()) project = self.engine.projects.get_project(project.identifier) modules = project.get_default_branch().get_head().modules self.assertEqual(len(modules), 5) for m in modules: self.assertTrue(m.is_success) self.assertEqual(len(modules[0].provenance.write['friends'].columns), 2) ds = project.datastore.get_dataset( modules[0].provenance.write['friends'].identifier) self.assertEqual(ds.fetch_rows()[0].values[1], 23)
def parse_command(tokens, notebook, datasets=dict()): """Parse command line tokens that represent a notebook cell command. The command is parse againts the given notebook state. Returns the module command or None if the token list does not specify a valid command. The function has side effects in case a dataset is loaded from local file. In this case the file is uploaded before the command object is returned. Parameters ---------- tokens: list(string) Command line tokens specifying the command notebook: vizier.api.client.resources.notebook.Notebook Current notebook state datasets: dict, optional Mapping of available dataset names to dataset identifier Returns ------- vizier.engine.module.command.ModuleCommand """ if len(tokens) == 2: if tokens[0] == 'python': return pycell.python_cell(source=get_script(tokens[1])) elif len(tokens) == 3: if tokens[0:2] == ['drop', 'dataset']: # drop dataset <dataset> dataset_name = tokens[2].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.drop_dataset(dataset_name=dataset_name) elif len(tokens) >= 4 and tokens[0] == 'filter' and tokens[-2] == 'from': # filter <column-1>{::<new-name>} ... from <dataset> dataset_name = tokens[-1].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) columns = list() for col_spec in tokens[1:-2]: if '::' in col_spec: col = ds.get_column(col_spec[:col_spec.find('::')]) new_name = col_spec[col_spec.find('::') + 2:] else: col = ds.get_column(col_spec) new_name = None entry = {'column': col.identifier} if not new_name is None: entry['name'] = new_name columns.append(entry) return vizual.projection(dataset_name=dataset_name, columns=columns) elif len(tokens) >= 4 and tokens[0] == 'sort' and tokens[2] == 'by': # sort <dataset> by <column-1>{::[DESC|ASC]} ... dataset_name = tokens[1].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) columns = list() for sort_spec in tokens[3:]: if '::' in sort_spec: col = ds.get_column(sort_spec[:sort_spec.find('::')]) sort_order = sort_spec[sort_spec.find('::') + 2:].lower() sort_order = sort.SORT_ASC if sort_order == 'asc' else sort.SORT_DESC else: col = ds.get_column(sort_spec) sort_order = sort.SORT_ASC columns.append({'column': col.identifier, 'order': sort_order}) return vizual.sort_dataset(dataset_name=dataset_name, columns=columns) elif len(tokens) == 5: if tokens[0:2] == ['delete', 'column'] and tokens[3] == 'from': # delete column <name> from <dataset> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') # Get the referenced dataset and column from the current notebook # state ds = notebook.get_dataset(datasets[dataset_name]) col = ds.get_column(tokens[2]) return vizual.delete_column(dataset_name=dataset_name, column=col.identifier) elif tokens[0:2] == ['delete', 'row'] and tokens[3] == 'from': # delete row <row-index> from <dataset> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.delete_row(dataset_name=dataset_name, row=int(tokens[2])) elif tokens[0] == 'load' and tokens[2] == 'from' and tokens[ 3] == 'file': # load <name> from file <file> filename = tokens[4] file_id = notebook.upload_file(filename=filename) return vizual.load_dataset(dataset_name=tokens[1], file={ FILE_ID: file_id, FILE_NAME: os.path.basename(filename) }) elif tokens[0] == 'load' and tokens[2] == 'from' and tokens[3] == 'url': return vizual.load_dataset(dataset_name=tokens[1], file={FILE_URI: tokens[4]}) elif tokens[0:2] == ['rename', 'dataset'] and tokens[3] == 'to': # rename dataset <dataset> to <new-name> dataset_name = tokens[2].lower() # Get the referenced dataset and column from the current notebook # state if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.rename_dataset(dataset_name=dataset_name, new_name=tokens[4]) elif tokens[0] == 'update': # update <dataset-name> <column-name> <row-index>{ <value>}\ dataset_name = tokens[1].lower() # Get the referenced dataset and column from the current notebook # state if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) col = ds.get_column(tokens[2]) return vizual.update_cell(dataset_name=dataset_name, column=col.identifier, row=int(tokens[3]), value=cast(tokens[4])) elif len(tokens) >= 6 and tokens[0] == 'chart': if tokens[0] == 'chart' and tokens[2] == 'on' and tokens[4] == 'with': # chart <chart-name> on <dataset-name> with <column-name:label:start-end> ... dataset_name = tokens[3].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) series = list() for spec in tokens[5:]: s_tokens = spec.split(':') if len(s_tokens) != 3: print('invalid data series ' + str(s_tokens)) return None s = { 'column': ds.get_column(s_tokens[0]).identifier, 'range': s_tokens[2].replace('-', ':') } if s_tokens[1] != '': s['label'] = s_tokens[1] series.append(s) return plot.create_plot(chart_name=tokens[1], dataset_name=dataset_name, series=series) elif len(tokens) == 7: if tokens[0:3] == ['insert', 'row', 'into' ] and tokens[4:6] == ['at', 'position']: # insert row into <dataset> at position <row-index> dataset_name = tokens[3].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.insert_row(dataset_name=dataset_name, position=int(tokens[6])) elif tokens[0:2] == ['rename', 'column' ] and tokens[3] == 'in' and tokens[5] == 'to': # rename column <name> in <dataset> to <new-name> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) col = ds.get_column(tokens[2]) return vizual.rename_column(dataset_name=dataset_name, column=col.identifier, name=tokens[6]) elif len(tokens) == 8: if tokens[0:2] == [ 'insert', 'column' ] and tokens[3] == 'into' and tokens[5:7] == ['at', 'position']: # insert column <name> into <dataset> at position <column-index> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.insert_column(dataset_name=dataset_name, position=int(tokens[7]), name=tokens[2]) elif tokens[0:2] == [ 'move', 'column' ] and tokens[3] == 'in' and tokens[5:7] == ['to', 'position']: # move column <name> in <dataset> to position <column-index> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') ds = notebook.get_dataset(datasets[dataset_name]) col = ds.get_column(tokens[2]) return vizual.move_column(dataset_name=dataset_name, column=col.identifier, position=int(tokens[7])) elif tokens[0:2] == [ 'move', 'row' ] and tokens[3] == 'in' and tokens[5:7] == ['to', 'position']: # move row <row-index> in <dataset> to position <target-index> dataset_name = tokens[4].lower() if not dataset_name in datasets: raise ValueError('unknown dataset \'' + dataset_name + '\'') return vizual.move_row(dataset_name=dataset_name, row=int(tokens[2]), position=int(tokens[7])) return None