def parse_descriptor_string(d_string, cur_package=None): """parse_descriptor_string will expand names of modules using information about the current package and allowing shortcuts for any bundled vistrails packages (e.g. "basic" for "org.vistrails.vistrails.basic"). It also allows a nicer format for namespace/module specification (namespace comes fist unlike port specifications where it is after the module name... Examples: "persistence:PersistentInputFile", None -> ("org.vistrails.vistrails.persistence", PersistentInputFile", "") "basic:String", None -> ("org.vistrails.vistrails.basic", "String", "") "NamespaceA|NamespaceB|Module", "org.example.my" -> ("org.example.my", "Module", "NamespaceA|NamespaceB") """ package = '' qual_name = '' name = '' namespace = None parts = d_string.strip().split(':', 1) if len(parts) > 1: qual_name = parts[1] if '.' in parts[0]: package = parts[0] else: package = '%s.%s' % (get_vistrails_default_pkg_prefix(), parts[0]) else: qual_name = d_string if cur_package is None: from vistrails.core.modules.module_registry import get_module_registry reg = get_module_registry() if reg._current_package is not None: package = reg._current_package.identifier else: import vistrails.core.modules.basic_modules basic_pkg = vistrails.core.modules.basic_modules.identifier package = basic_pkg else: package = cur_package qual_parts = qual_name.rsplit('|', 1) if len(qual_parts) > 1: namespace, name = qual_parts else: name = qual_name return (package, name, namespace)
def parse_descriptor_string(d_string, cur_package=None): """parse_descriptor_string will expand names of modules using information about the current package and allowing shortcuts for any bundled vistrails packages (e.g. "basic" for "org.vistrails.vistrails.basic"). It also allows a nicer format for namespace/module specification (namespace comes fist unlike port specifications where it is after the module name... Examples: "persistence:PersistentInputFile", None -> ("org.vistrails.vistrails.persistence", PersistentInputFile", "") "basic:String", None -> ("org.vistrails.vistrails.basic", "String", "") "NamespaceA|NamespaceB|Module", "org.example.my" -> ("org.example.my", "Module", "NamespaceA|NamespaceB") """ package = '' qual_name = '' name = '' namespace = None parts = d_string.strip().split(':', 1) if len(parts) > 1: qual_name = parts[1] if '.' in parts[0]: package = parts[0] else: package = '%s.%s' % (get_vistrails_default_pkg_prefix(), parts[0]) else: qual_name = d_string if cur_package is None: reg = get_module_registry() if reg._current_package is not None: package = reg._current_package.identifier else: package = get_vistrails_basic_pkg_id() else: package = cur_package qual_parts = qual_name.rsplit('|', 1) if len(qual_parts) > 1: namespace, name = qual_parts else: name = qual_name return (package, name, namespace)
def find_spreadsheet_cells(pipeline, root_id=None): if root_id is None: root_id = [] # Sometimes we run without the spreadsheet! spreadsheet_pkg = \ '%s.spreadsheet' % get_vistrails_default_pkg_prefix() if registry.has_module(spreadsheet_pkg, 'SpreadsheetCell'): # First pass to check cells types cellType = \ registry.get_descriptor_by_name(spreadsheet_pkg, 'SpreadsheetCell').module for mId, module in pipeline.modules.iteritems(): desc = registry.get_descriptor_by_name( module.package, module.name, module.namespace) if issubclass(desc.module, cellType): self.spreadsheet_cells.append(root_id + [mId]) for subworkflow_id in self.find_subworkflows(pipeline): subworkflow = pipeline.modules[subworkflow_id] if subworkflow.pipeline is not None: find_spreadsheet_cells(subworkflow.pipeline, root_id + [subworkflow_id])
def find_spreadsheet_cells(pipeline, root_id=None): if root_id is None: root_id = [] # Sometimes we run without the spreadsheet! spreadsheet_pkg = \ '%s.spreadsheet' % get_vistrails_default_pkg_prefix() if registry.has_module(spreadsheet_pkg, 'SpreadsheetCell'): # First pass to check cells types cellType = \ registry.get_descriptor_by_name(spreadsheet_pkg, 'SpreadsheetCell').module for mId, module in pipeline.modules.iteritems(): desc = registry.get_descriptor_by_name(module.package, module.name, module.namespace) if issubclass(desc.module, cellType): self.spreadsheet_cells.append(root_id + [mId]) for subworkflow_id in self.find_subworkflows(pipeline): subworkflow = pipeline.modules[subworkflow_id] if subworkflow.pipeline is not None: find_spreadsheet_cells(subworkflow.pipeline, root_id + [subworkflow_id])
def performParameterExploration(self): """ performParameterExploration() -> None Perform the exploration by collecting a list of actions corresponding to each dimension """ registry = get_module_registry() actions = self.peWidget.table.collectParameterActions() spreadsheet_pkg = '%s.spreadsheet' % get_vistrails_default_pkg_prefix() # Set the annotation to persist the parameter exploration # TODO: For now, we just replace the existing exploration - Later we should append them. xmlString = "<paramexps>\n" + self.getParameterExploration() + "\n</paramexps>" self.controller.vistrail.set_paramexp(self.currentVersion, xmlString) self.controller.set_changed(True) if self.controller.current_pipeline and actions: explorer = ActionBasedParameterExploration() (pipelines, performedActions) = explorer.explore( self.controller.current_pipeline, actions) dim = [max(1, len(a)) for a in actions] if (registry.has_module(spreadsheet_pkg, 'CellLocation') and registry.has_module(spreadsheet_pkg, 'SheetReference')): modifiedPipelines = self.virtualCell.positionPipelines( 'PE#%d %s' % (QParameterExplorationTab.explorationId, self.controller.name), dim[2], dim[1], dim[0], pipelines, self.controller) else: modifiedPipelines = pipelines mCount = [] for p in modifiedPipelines: if len(mCount)==0: mCount.append(0) else: mCount.append(len(p.modules)+mCount[len(mCount)-1]) # Now execute the pipelines totalProgress = sum([len(p.modules) for p in modifiedPipelines]) progress = QtGui.QProgressDialog('Performing Parameter ' 'Exploration...', '&Cancel', 0, totalProgress) progress.setWindowTitle('Parameter Exploration') progress.setWindowModality(QtCore.Qt.WindowModal) progress.show() QParameterExplorationTab.explorationId += 1 interpreter = get_default_interpreter() for pi in xrange(len(modifiedPipelines)): progress.setValue(mCount[pi]) QtCore.QCoreApplication.processEvents() if progress.wasCanceled(): break def moduleExecuted(objId): if not progress.wasCanceled(): #progress.setValue(progress.value()+1) #the call above was crashing when used by multithreaded #code, replacing with the call below (thanks to Terence #for submitting this fix). QtCore.QMetaObject.invokeMethod(progress, "setValue", QtCore.Q_ARG(int,progress.value()+1)) QtCore.QCoreApplication.processEvents() kwargs = {'locator': self.controller.locator, 'current_version': self.controller.current_version, 'view': self.controller.current_pipeline_scene, 'module_executed_hook': [moduleExecuted], 'reason': 'Parameter Exploration', 'actions': performedActions[pi], } interpreter.execute(modifiedPipelines[pi], **kwargs) progress.setValue(totalProgress)
def performParameterExploration(self): """ performParameterExploration() -> None Perform the exploration by collecting a list of actions corresponding to each dimension """ registry = get_module_registry() actions = self.peWidget.table.collectParameterActions() spreadsheet_pkg = '%s.spreadsheet' % get_vistrails_default_pkg_prefix() # Set the annotation to persist the parameter exploration # TODO: For now, we just replace the existing exploration - Later we should append them. xmlString = "<paramexps>\n" + self.getParameterExploration( ) + "\n</paramexps>" self.controller.vistrail.set_paramexp(self.currentVersion, xmlString) self.controller.set_changed(True) if self.controller.current_pipeline and actions: explorer = ActionBasedParameterExploration() (pipelines, performedActions) = explorer.explore( self.controller.current_pipeline, actions) dim = [max(1, len(a)) for a in actions] if (registry.has_module(spreadsheet_pkg, 'CellLocation') and registry.has_module(spreadsheet_pkg, 'SheetReference')): modifiedPipelines = self.virtualCell.positionPipelines( 'PE#%d %s' % (QParameterExplorationTab.explorationId, self.controller.name), dim[2], dim[1], dim[0], pipelines, self.controller) else: modifiedPipelines = pipelines mCount = [] for p in modifiedPipelines: if len(mCount) == 0: mCount.append(0) else: mCount.append(len(p.modules) + mCount[len(mCount) - 1]) # Now execute the pipelines totalProgress = sum([len(p.modules) for p in modifiedPipelines]) progress = QtGui.QProgressDialog( 'Performing Parameter ' 'Exploration...', '&Cancel', 0, totalProgress) progress.setWindowTitle('Parameter Exploration') progress.setWindowModality(QtCore.Qt.WindowModal) progress.show() QParameterExplorationTab.explorationId += 1 interpreter = get_default_interpreter() for pi in xrange(len(modifiedPipelines)): progress.setValue(mCount[pi]) QtCore.QCoreApplication.processEvents() if progress.wasCanceled(): break def moduleExecuted(objId): if not progress.wasCanceled(): #progress.setValue(progress.value()+1) #the call above was crashing when used by multithreaded #code, replacing with the call below (thanks to Terence #for submitting this fix). QtCore.QMetaObject.invokeMethod( progress, "setValue", QtCore.Q_ARG(int, progress.value() + 1)) QtCore.QCoreApplication.processEvents() kwargs = { 'locator': self.controller.locator, 'current_version': self.controller.current_version, 'view': self.controller.current_pipeline_scene, 'module_executed_hook': [moduleExecuted], 'reason': 'Parameter Exploration', 'actions': performedActions[pi], } interpreter.execute(modifiedPipelines[pi], **kwargs) progress.setValue(totalProgress)
def process_exec(item_exec, workflow, account, upstream_lookup, downstream_lookup, depth, conn_artifacts=None, function_artifacts=None, module_processes=None, in_upstream_artifacts={}, in_downstream_artifacts={}, add_extras=False): print 'in_upstream:', [(n, x.db_id) for n, x_list in in_upstream_artifacts.iteritems() for x in x_list] print 'in_downstream:', [(n, x.db_id) for n, x_list in in_downstream_artifacts.iteritems() for x in x_list] # FIXME merge conn_artifacts and function_artifacts # problem is that a conn_artifact is OUTPUT while function_artifact # is INPUT if conn_artifacts is None: conn_artifacts = {} if function_artifacts is None: function_artifacts = {} if module_processes is None: module_processes = {} # while item_exec.vtType == DBLoopExec.vtType: # item_exec = item_exec.db_item_execs[0] (module, process) = module_processes[item_exec.db_module_id] def process_connection(conn): source = conn.db_ports_type_index['source'] source_t = (source.db_moduleId, source.db_name) in_cache = False print '!!! processing', source_t if source_t in conn_artifacts: artifact = conn_artifacts[source_t] in_cache = True else: # key off source module and port name # get descriptor from registry and then port_spec # store port_spec as artifact if source.db_moduleId < 0: dest = conn.db_ports_type_index['destination'] module = source.db_module else: module = workflow.db_modules_id_index[source.db_moduleId] print module.db_name, module.db_id pkg = get_package(reg, module.db_package, module.db_version) if not module.db_namespace: module_namespace = '' else: module_namespace = module.db_namespace module_desc = \ pkg.db_module_descriptors_name_index[(module.db_name, module_namespace, '')] # FIXME make work for module port_specs, too # for example, a PythonSource with a given port in # module.db_portSpecs port_spec = None spec_t = (source.db_name, 'output') if spec_t in module.db_portSpecs_name_index: port_spec = module.db_portSpecs_name_index[spec_t] while port_spec is None and \ module_desc.db_id != reg.db_root_descriptor_id: if spec_t in module_desc.db_portSpecs_name_index: port_spec = module_desc.db_portSpecs_name_index[spec_t] base_id = module_desc.db_base_descriptor_id # inefficient spin through db_packages but we do # not have the descriptors_by_id index that exists # on core.module_registry.ModuleRegistry here module_desc = None for pkg in reg.db_packages: if base_id in pkg.db_module_descriptors_id_index: module_desc = \ pkg.db_module_descriptors_id_index[base_id] break if module_desc is None: raise KeyError("Cannot find base descriptor id %d" % base_id) # pkg = get_package(reg, module_desc.db_package, # module_desc.db_package_version) # module_desc = pkg.db_module_descriptors_id_index[base_id] if port_spec is None: port_spec = module_desc.db_portSpecs_name_index[spec_t] print module_desc.db_name artifact = \ create_artifact_from_port_spec(port_spec, account, id_scope) artifacts.append(artifact) print 'adding conn_artifact', artifact.db_id, source_t, \ source.db_moduleName conn_artifacts[source_t] = artifact return (artifact, in_cache) def process_map(module, found_input_ports, found_output_ports): print "*** Processing Map" if depth+1 in depth_accounts: account = depth_accounts[depth+1] else: account = create_account(depth+1, id_scope) accounts.append(account) depth_accounts[depth+1] = account # need to have process that extracts artifacts for each iteration input_list_artifact = found_input_ports['InputList'] result_artifact = found_output_ports.get('Result', None) input_port_list = \ eval(found_input_ports['InputPort'].db_parameters[0].db_val) output_port = \ found_input_ports['OutputPort'].db_parameters[0].db_val s_process = create_process_manual('Split', account, id_scope) processes.append(s_process) dependencies.append(create_used(s_process, input_list_artifact, account, id_scope)) # need to have process that condenses artifacts from each iteration if result_artifact is not None: j_process = create_process_manual('Join', account, id_scope) processes.append(j_process) for loop_exec in item_exec.db_loop_execs: loop_up_artifacts = {} loop_down_artifacts = {} for input_name in input_port_list: port_spec = DBPortSpec(id=-1, name=input_name, type='output') s_artifact = \ create_artifact_from_port_spec(port_spec, account, id_scope) artifacts.append(s_artifact) dependencies.append(create_was_generated_by(s_artifact, s_process, account, id_scope)) if input_name not in loop_up_artifacts: loop_up_artifacts[input_name] = [] loop_up_artifacts[input_name].append(s_artifact) # process output_port if loop_exec.db_completed == 1: port_spec = DBPortSpec(id=-1, name=output_port, type='output') o_artifact = \ create_artifact_from_port_spec(port_spec, account, id_scope) artifacts.append(o_artifact) if output_port not in loop_down_artifacts: loop_down_artifacts[output_port] = [] loop_down_artifacts[output_port].append(o_artifact) if result_artifact is not None: dependencies.append(create_used(j_process, o_artifact, account, id_scope)) # now process a loop_exec for child_exec in loop_exec.db_item_execs: do_create_process(workflow, child_exec, account, module_processes) for child_exec in loop_exec.db_item_execs: process_exec(child_exec, workflow, account, upstream_lookup, downstream_lookup, depth+1, conn_artifacts, function_artifacts, module_processes, loop_up_artifacts, loop_down_artifacts, True) # need to set Return artifact and connect j_process to it if result_artifact is not None: dependencies.append(create_was_generated_by(result_artifact, j_process, account, id_scope)) def process_group(module, found_input_ports, found_output_ports): # identify depth and create new account if necessary # recurse with new account # need to link to upstream and downstream correctly workflow = module.db_workflow # run the whole upstream construction, etc, using this exec # and the group's workflow if depth+1 in depth_accounts: account = depth_accounts[depth+1] else: account = create_account(depth+1, id_scope) accounts.append(account) depth_accounts[depth+1] = account process_workflow(workflow, item_exec, account, out_upstream_artifacts, out_downstream_artifacts, depth+1) def process_port_module(module, found_input_ports, found_output_ports): port_name = found_input_ports['name'].db_parameters[0].db_val if module.db_name == 'InputPort': if port_name in in_upstream_artifacts: for artifact in in_upstream_artifacts[port_name]: dependencies.append(create_used(process, artifact, account, id_scope)) elif module.db_name == 'OutputPort': if port_name in in_downstream_artifacts: for artifact in in_downstream_artifacts[port_name]: dependencies.append(create_was_generated_by(artifact, process, account, id_scope)) def process_if_module(module, found_input_ports, found_output_ports): print 'processing IFFFF' # need to decide which path was taken? # check which module was executed, then know which branch was # taken? true_conn = found_input_ports['TruePort'] false_conn = found_input_ports['FalsePort'] true_id = true_conn.db_ports_type_index['source'].db_moduleId false_id = false_conn.db_ports_type_index['source'].db_moduleId print '$$ TRUE ID:', true_id print '$$ FALSE ID:', false_id for x,y in module_processes.iteritems(): print x, ':', y if true_id in module_processes: cond_process = module_processes[true_id][1] elif false_id in module_processes: cond_process = module_processes[false_id][1] else: raise RuntimeError("cannot process if") # FIXME: assume true for now # eventually need to check which module_id was execed for this # current item exec dependencies.append(create_was_triggered_by(cond_process, process, account, id_scope)) if add_extras: print '***adding extras' out_upstream_artifacts = copy.copy(in_upstream_artifacts) out_downstream_artifacts = copy.copy(in_downstream_artifacts) for port_name, artifact_list in in_upstream_artifacts.iteritems(): for artifact in artifact_list: dependencies.append(create_used(process, artifact, account, id_scope)) for port_name, artifact_list in in_downstream_artifacts.iteritems(): for artifact in artifact_list: # conn_artifacts[(port_name, 'output')] = artifact dependencies.append(create_was_generated_by(artifact, process, account, id_scope)) else: out_upstream_artifacts = {} out_downstream_artifacts = {} ctrl_flow_pkg = '%s.control_flow' % get_vistrails_default_pkg_prefix() basic_pkg = get_vistrails_basic_pkg_id() all_special_ports = {'%s:Map' % ctrl_flow_pkg: [{'InputPort': False, 'OutputPort': False, 'InputList': True, 'FunctionPort': False}, {'Result': True}, process_map], '%s:Group' % basic_pkg: [{}, {}, process_group], '%s:InputPort' % basic_pkg: [{'name': False, 'spec': False, 'old_name': False}, {}, process_port_module], '%s:OutputPort' % basic_pkg: [{'name': False, 'spec': False, 'old_name': False}, {}, process_port_module], '%s:If' % ctrl_flow_pkg: [{'TruePort': False, 'FalsePort': False}, {}, process_if_module], } module_desc_str = module.db_package + ':' + module.db_name special_ports = all_special_ports.get(module_desc_str, [{}, {}, None]) found_input_ports = {} found_output_ports = {} # process used_files annotations # process generated_tables annotations: for annotation in item_exec.db_annotations: def process_db_tuple(db_tuple): db_tuple = (str(db_tuple[0]),) + db_tuple[1:] if db_tuple not in db_artifacts: artifact = create_artifact_from_db_tuple(db_tuple, account, id_scope) artifacts.append(artifact) db_artifacts[db_tuple] = artifact else: artifact = db_artifacts[db_tuple] if int(artifact.db_accounts[0].db_id[4:]) > \ int(account.db_id[4:]): artifact.db_accounts[0] = account return artifact if annotation.db_key == 'used_files': used_files = eval(annotation.db_value) for fname in used_files: if fname not in file_artifacts: artifact = create_artifact_from_filename(fname, account, id_scope) artifacts.append(artifact) file_artifacts[fname] = artifact else: artifact = file_artifacts[fname] if int(artifact.db_accounts[0].db_id[4:]) > \ int(account.db_id[4:]): artifact.db_accounts[0] = account dependencies.append(create_used(process, artifact, account, id_scope)) elif annotation.db_key == 'generated_tables': generated_tables = eval(annotation.db_value) for db_tuple in generated_tables: artifact = process_db_tuple(db_tuple) dependencies.append(create_was_generated_by(artifact, process, account, id_scope)) elif annotation.db_key == 'used_tables': used_tables = eval(annotation.db_value) for db_tuple in used_tables: artifact = process_db_tuple(db_tuple) dependencies.append(create_used(process, artifact, account, id_scope)) # process functions for function in module.db_functions: # FIXME let found_input_ports, found_output_ports store lists? if function.db_name in special_ports[0]: if not special_ports[0][function.db_name]: found_input_ports[function.db_name] = function continue function_t = (module.db_id, function.db_name) if function_t in function_artifacts: artifact = function_artifacts[function_t] if int(artifact.db_accounts[0].db_id[4:]) > \ int(account.db_id[4:]): artifact.db_accounts[0] = account else: artifact = create_artifact_from_function(function, account, id_scope) print 'adding artifact', artifact.db_id artifacts.append(artifact) function_artifacts[function_t] = artifact if function.db_name in special_ports[0]: found_input_ports[function.db_name] = artifact if function.db_name not in out_upstream_artifacts: out_upstream_artifacts[function.db_name] = [] out_upstream_artifacts[function.db_name].append(artifact) dependencies.append(create_used(process, artifact, account, id_scope)) # process connections if module.db_id in upstream_lookup: for conns in upstream_lookup[module.db_id].itervalues(): for conn in conns: dest = conn.db_ports_type_index['destination'] if dest.db_name in special_ports[0]: if not special_ports[0][dest.db_name]: found_input_ports[dest.db_name] = conn continue (artifact, in_cache) = process_connection(conn) if dest.db_name in special_ports[0]: found_input_ports[dest.db_name] = artifact if dest.db_name not in out_upstream_artifacts: out_upstream_artifacts[dest.db_name] = [] out_upstream_artifacts[dest.db_name].append(artifact) print 'adding dependency (pa)', process.db_id, \ artifact.db_id dependencies.append(create_used(process, artifact, account, id_scope)) if item_exec.db_completed == 1: if module.db_id in downstream_lookup: # check if everything completed successfully for this? for conns in downstream_lookup[module.db_id].itervalues(): for conn in conns: source = conn.db_ports_type_index['source'] if source.db_name in special_ports[1]: if not special_ports[1][source.db_name]: found_output_ports[source.db_name] = conn continue dest = conn.db_ports_type_index['destination'] dest_module = \ workflow.db_modules_id_index[dest.db_moduleId] dest_desc_str = dest_module.db_package + ':' + \ dest_module.db_name dest_special_ports = all_special_ports.get(dest_desc_str, [{}, {}, None]) if dest.db_name in dest_special_ports[0] and \ not dest_special_ports[0][dest.db_name]: print 'skipping', dest.db_name continue (artifact, in_cache) = process_connection(conn) if not in_cache: if source.db_name in special_ports[1]: found_output_ports[source.db_name] = artifact if source.db_name not in out_downstream_artifacts: out_downstream_artifacts[source.db_name] = [] out_downstream_artifacts[source.db_name].append(artifact) print 'adding dependency (ap)', artifact.db_id, \ process.db_id dependencies.append(create_was_generated_by(artifact, process, account, id_scope)) if special_ports[2] is not None: special_ports[2](module, found_input_ports, found_output_ports)