def compute(self): path = self.get_input('directory').name if not os.path.exists(path): self.setup_experiment() if (not os.path.exists(os.path.join(path, 'config.yml')) or not os.path.exists(os.path.join(path, '.reprounzip'))): raise ModuleError( self, "Directory doesn't contain the necessary file; " "is an experiment set up there?") with open(os.path.join(path, '.reprounzip'), 'rb') as fp: unpacked_info = pickle.load(fp) unpacker = unpacked_info['unpacker'] self.set_output('experiment', Experiment(path, unpacker))
def job_start(self, params): work_dir = params['working_directory'] self.machine = self.get_machine() use_machine(self.machine) self.job = Subshell("remote", params['command'], work_dir) self.job.run() ret = self.job._ret if ret: try: job_id = int(ret.split('\n')[0]) except ValueError: end_machine() raise ModuleError(self, "Error submitting job: %s" % ret) self.set_job_machine(params, self.machine) return params
def compute(self): dict_from_port = {} params_dict = {} if dict_port is not None: dict_from_port = self.get_input(dict_port.name) for opt in optional: if opt in dict_from_port: # obtain the parameter from the passed in dict params_dict[opt] = dict_from_port[opt] if self.has_input(opt): if opt in vt_reserved: p_name = '_' + opt else: p_name = opt params_dict[opt] = self.get_input(p_name) for mand in mandatory: if mand in vt_reserved: p_name = '_' + mand else: p_name = mand if mand in dict_from_port: params_dict[mand] = dict_from_port[mand] try: params_dict[mand] = self.get_input(p_name) except ModuleError as me: if mand in params_dict: # pass on this exception, as the dictionary on dict_port # has taken care of this key pass else: logger.debug('The mandatory port {0} does not have input' 'and the input dictionary is either not ' 'present or doesn\'t contain this key' ''.format(mand)) raise ModuleError(__name__, me) # check for the presence of a 'value' attribute on the incoming # port values. This indicates that this is a NSLS2 port type for name, val in six.iteritems(params_dict): if hasattr(val, 'value'): params_dict[name] = val.value ret = library_func(**params_dict) if len(output_ports) == 1: self.set_output(output_ports[0].name, ret) else: for (out_port, ret_val) in zip(output_ports, ret): self.set_output(out_port.name, ret_val)
def compute(self): table = self.get_input('table') try: column_idx = choose_column( table.columns, column_names=table.names, name=self.force_get_input('column_name', None), index=self.force_get_input('column_index', None)) self.set_output( 'value', table.get_column(column_idx, self.get_input('numeric', allow_default=True))) except ValueError, e: raise ModuleError(self, e.message)
def compute(self): if self.has_input('title'): title = self.get_input('title') else: title = 'VisTrails Dialog' label = self.get_input('label') default = self.get_input('default') self.cacheable_dialog = self.get_input('cacheable') (result, ok) = QtGui.QInputDialog.getText(None, title, label, self.mode, default) if not ok: raise ModuleError(self, "Canceled") self.set_output('result', str(result))
def update_upstream(self): """ Decides whether or not to run the upstream. If a job has already been submitted and the local JobMonitor knows of it, we don't need to run upstream modules to check the status. If status check indicates that the job no longer exists, then we should run upstream then submit again. """ if not hasattr(self, 'signature'): raise ModuleError(self, "Module has no signature") jm = self.job_monitor() if not (jm.getCache(self.signature) or jm.getJob(self.signature)): # We need to submit a new job # Update upstream, compute() will need it super(JobMixin, self).update_upstream()
def path_port(module, portName): value = module.forceGetInputListFromPort(portName) if len(value) > 1: raise ModuleError( module, 'Multiple items found from Port ' + portName + '. Only single entry handled. Please remove extraneous items.') value = value[0] path = value.name path = path.replace("/", os.path.sep) if os.path.exists(path): return path elif os.path.exists(getFileRelativeToCurrentVT(path, module)): return getFileRelativeToCurrentVT(path, module) else: raise RuntimeError, 'The indicated file or directory, ' + \ path + ', does not exist on the file system. Cannot continue!'
def compute_output(self, output_module, configuration): format_map = { 'png': vtk.vtkPNGWriter, 'jpg': vtk.vtkJPEGWriter, 'tif': vtk.vtkTIFFWriter, 'pnm': vtk.vtkPNMWriter } r = output_module.get_input("value")[0].vtkInstance w = configuration["width"] h = configuration["height"] img_format = self.get_format(configuration) if img_format not in format_map: raise ModuleError(output_module, 'Cannot output in format "%s"' % img_format) fname = self.get_filename(configuration, suffix='.%s' % img_format) render_to_image(fname, format_map[img_format], r, w, h)
def update_upstream(self): """A modified version of the update_upstream method. Only updates upstream if the file is not found in the store. """ if not hasattr(self, 'signature'): raise ModuleError(self, "Module has no signature") file_store = get_default_store() entries = file_store.query({KEY_SIGNATURE: self.signature}) best = None for entry in entries: if best is None or entry[KEY_TIME] > best[KEY_TIME]: best = entry if best is not None: self._cached = best.filename else: super(PersistedPath, self).update_upstream()
class FileToFileMode(FileMode): def compute_output(self, output_module, configuration=None): old_fname = output_module.get_input('value').name full_path = self.get_filename(configuration) # we know we are in overwrite mode because it would have been # flagged otherwise if os.path.exists(full_path): try: os.remove(full_path) except OSError, e: raise ModuleError(output_module, ('Could not delete existing ' 'path "%s"' % full_path)) try: vistrails.core.system.link_or_copy(old_fname, full_path) except OSError, e: msg = "Could not create file '%s': %s" % (full_path, e) raise ModuleError(output_module, msg)
def compute(self): port_object = None if self.has_input("SetInputConnection0"): ic = self.get_input("SetInputConnection0") if hasattr(ic, "vtkInstance"): ic = ic.vtkInstance producer = ic.GetProducer() try: port_object = producer.GetOutput() except AttributeError: raise ModuleError(self, "expected a module that supports GetOutput") elif self.has_input("SetInput"): port_object = self.get_input("SetInput") if hasattr(port_object, "vtkInstance"): port_object = port_object.vtkInstance if port_object: self.auto_set_results(port_object)
class ProjectTable(Table): """Build a table from the columns of another table. This allows you to restrict, reorder or rename the columns of a table. You can also duplicate columns by mentioning them several times. """ _input_ports = [("table", "Table"), ("column_names", "basic:List"), ("column_indexes", "basic:List"), ("new_column_names", "basic:List", {"optional": True})] _output_ports = [("value", Table)] def compute(self): table = self.get_input("table") try: indexes = choose_columns( table.columns, column_names=table.names, names=self.force_get_input('column_names', None), indexes=self.force_get_input('column_indexes', None)) except ValueError, e: raise ModuleError(self, e.message) if self.has_input('new_column_names'): column_names = self.get_input('new_column_names') if len(column_names) != len(indexes): raise ModuleError(self, "new_column_names was specified but doesn't " "have the right number of names") else: column_names = [] names = {} for i in indexes: name = table.names[i] if name in names: nb = names[name] names[name] += 1 name = '%s_%d' % (name, nb) else: names[name] = 1 column_names.append(name) projected_table = ProjectedTable(table, indexes, column_names) self.set_output("value", projected_table)
def call_hadoop(self, arguments, workdir, identifier, machine): self.is_cacheable = lambda *args, **kwargs: False config = self.get_hadoop_config(machine) argList = [config['hadoop']] if type(arguments) in [str, unicode]: argList += arguments.split(' ') elif type(arguments) == list: argList += arguments else: raise ModuleError(self, 'Invalid argument types to hadoop') # 1. this version returns when finished #return subprocess.call(argList) # 2. this version reads the results incrementally # expect = machine.remote._expect_token # machine.remote.push_expect(None) # Do not wait for call to finish # result = machine.remote.send_command(" ".join(argList)).strip() # machine.remote.pop_expect() # restore expect # # We could show the output in a gui # print "**** hadoop streaming running ****" # print result, # while not expect in result: # output = machine.remote.consume_output() # if output: # print output, # result += output # 3. The final version should detach the process on the server use_machine(machine) cdir = CreateDirectory("remote", workdir) job = Subshell("remote", command=" ".join(argList), working_directory=workdir, identifier=identifier, dependencies=[cdir]) job.run() finished = job.finished() if not finished: status = job.status() # The Subshell class provides the JobHandle interface, i.e. # finished() raise ModuleSuspended(self, '%s' % status, handle=job) self.is_cacheable = lambda *args, **kwargs: True return job.standard_error()
def _open_ssh(self, username, password, hostname, portnum, path): paramiko = py_import('paramiko', { 'pip': 'paramiko', 'linux-debian': 'python-paramiko', 'linux-ubuntu': 'python-paramiko', 'linux-fedora': 'python-paramiko'}) scp = py_import('scp', { 'pip': 'scp'}) local_filename = os.path.join(package_directory, cache_filename(self.url)) ssh = paramiko.SSHClient() ssh.load_system_host_keys() try: ssh.connect(hostname, port=portnum, username=username, password=password) except paramiko.SSHException, e: raise ModuleError(self.module, debug.format_exception(e))
def run(self, *args): """run(*args), runs ImageMagick's 'convert' on a shell, passing all arguments to the program. """ path = None if configuration.check('path'): path = configuration.path if path: cmd = os.path.join(path, 'convert') else: cmd = 'convert' cmd = [cmd] + list(args) cmdline = list2cmdline(cmd) if not configuration.quiet: debug.log(cmdline) r = os.system(cmdline) if r != 0: raise ModuleError(self, "system call failed: %r" % cmdline)
def compute_output(self, output_module, configuration): format_map = { 'png': vtk.vtkPNGWriter, 'jpg': vtk.vtkJPEGWriter, 'tif': vtk.vtkTIFFWriter, 'pnm': vtk.vtkPNMWriter } r = output_module.get_input("value").vtkInstance w = configuration["width"] h = configuration["height"] img_format = self.get_format(configuration) if img_format not in format_map: raise ModuleError(output_module, 'Cannot output in format "%s"' % img_format) fname = self.get_filename(configuration, suffix='.%s' % img_format) window = vtk.vtkRenderWindow() window.OffScreenRenderingOn() window.SetSize(w, h) # FIXME think this may be fixed in VTK6 so we don't have this # dependency... widget = None if vistrails.core.system.systemType == 'Darwin': from PyQt4 import QtCore, QtGui widget = QtGui.QWidget(None, QtCore.Qt.FramelessWindowHint) widget.resize(w, h) widget.show() window.SetWindowInfo(str(int(widget.winId()))) window.AddRenderer(r) window.Render() win2image = vtk.vtkWindowToImageFilter() win2image.SetInput(window) win2image.Update() writer = format_map[img_format]() writer.SetInput(win2image.GetOutput()) writer.SetFileName(fname) writer.Write() window.Finalize() if widget != None: widget.close()
def __init__(self): Module.__init__(self) config = get_vistrails_persistent_configuration() if config.check('webRepositoryURL'): self.base_url = config.webRepositoryURL else: raise ModuleError(self, ("No webRepositoryURL value defined" " in the Expert Configuration")) # check if we are running in server mode # this effects how the compute method functions if config.check('isInServerMode'): self.is_server = bool(config.isInServerMode) else: self.is_server = False # TODO: this '/' check should probably be done in core/configuration.py if self.base_url[-1] == '/': self.base_url = self.base_url[:-1]
def read_log(self, fname): # open the .vt bundle specified by the filename "fname" bundle = vistrails.db.services.io.open_vistrail_bundle_from_zip_xml( fname)[0] # get the log filename log_fname = bundle.vistrail.db_log_filename if log_fname is not None: # open the log log = vistrails.db.services.io.open_log_from_xml(log_fname, True) # convert the log from a db object vistrails.core.log.log.Log.convert(log) return log if log_fname is None: # throw error message raise ModuleError(self, "No log file accessible") return None
def compute(self): machine = self.get_machine() jm = self.job_monitor() id = self.signature job = jm.getCache(id) if not job: uri = self.force_get_input('HDFS File/URI') symlink = self.force_get_input('Symlink') if uri==None or symlink==None: raise ModuleError(self, "Missing 'HDFS File/URI' or 'Symlink' values") if '://' not in uri: uri = self.add_prefix(uri, machine) uri += '#' + symlink d = {'uri':uri} self.set_job_machine(d, machine) jm.setCache(id, d, self.job_name()) job = jm.getCache(id) self.set_output('URI', job.parameters['uri']) self.set_output('Machine', machine)
def get_hadoop_config(self, machine): if HadoopBaseModule.hadoop_configuration == None: hadoop_home = self.get_hadoop_home(machine) # paths to try in order streaming_paths = [ '/share/hadoop/tools/lib/', # AWS '/usr/lib/hadoop-mapreduce/', # NYU/CUSP '/contrib/streaming/' ] for path in streaming_paths: hs = hadoop_home + path command = ("python -c \"import os, os.path; print '' if not " "os.path.exists('{0}') else ''.join([i for i in " "os.listdir('{0}') if 'streaming' in i][-1:])\"" ).format(hs) streamingjar = machine.remote.send_command(command).strip() if streamingjar: break if not streamingjar: raise ModuleError( self, 'hadoop-streaming.jar not found. Please add ' 'its directory to list of supported paths.') hadoop = (hadoop_home + '/bin/hadoop') if hadoop_home else 'hadoop' hdfs = (hadoop_home + '/bin/hdfs') if hadoop_home else 'hdfs' if not machine.remote.command_exists(hdfs): hdfs = hadoop config = { 'home': hadoop_home, 'hadoop': hadoop, 'hdfs': hdfs, 'streaming.jar': hs + streamingjar } HadoopBaseModule.hadoop_configuration = config # reading configuration files are error-prone #self.read_site_config(machine) config['fs.defaultFS'] = '' # can access config only if hdfs command exists if hadoop != hdfs: config['fs.defaultFS'] = \ self.call_hdfs('getconf -confKey fs.defaultFS', machine) return HadoopBaseModule.hadoop_configuration
def call_it(function, p): # Translate between VisTrails objects and VTK objects if p is None: # None indicates a call with no parameters params = [] elif isinstance(p, tuple): # A tuple indicates a call with many parameters params = list(p) else: # Otherwise, it's a single parameter params = [p] # Unwraps VTK objects for i in xrange(len(params)): if hasattr(params[i], 'vtkInstance'): params[i] = params[i].vtkInstance try: self.call_input_function(function, params) except Exception, e: raise ModuleError( self, "VTK Exception: %s" % debug.format_exception(e))
def startJob(self, params): work_dir = params['working_directory'] use_machine(self.machine) self.cdir = CreateDirectory("remote", work_dir) trans = TransferFiles("remote", params['input_directory'], work_dir, dependencies=[self.cdir]) self.job = PBSScript("remote", params['command'], work_dir, dependencies=[trans], **params['additional_arguments']) self.job.run() try: ret = self.job._ret if ret: job_id = int(ret) except ValueError: end_machine() raise ModuleError(self, "Error submitting job: %s" % ret) return params
def compute(self): machine = self.get_machine() jm = self.job_monitor() id = self.signature job = jm.getCache(id) if not job: remote = self.get_input('Remote Location') local = self.get_input('Local File') override = self.force_get_input('Override', False) if '://' not in remote: remote = self.add_prefix(remote, machine) if os.path.exists(local.name): if override == False: raise ModuleError(self, 'Output already exists') else: if os.path.isdir(local.name): shutil.rmtree(local.name) else: os.unlink(local.name) tempfile = machine.remote.send_command('mktemp -d -u').strip() result = self.call_hdfs('dfs -get %s %s' % (remote, tempfile), machine) # too slow with many files #res = machine.send_command("get -r %s %s" % (tempfile, local.name) ) # tar files to increase speed result = machine.local.send_command('mkdir %s' % local.name) result = machine.sync(local.name, tempfile, mode=machine.MODE_REMOTE_LOCAL, use_tar=True) result = machine.remote.rm(tempfile, force=True, recursively=True) d = {'remote': remote, 'local': local.name} self.set_job_machine(d, machine) jm.setCache(id, d, self.job_name()) job = jm.getCache(id) self.set_output('Local File', PathObject(job.parameters['local'])) self.set_output('Machine', machine)
def execute(self): """ Tries to download a file from url. Returns the path to the local file. """ self.local_filename = os.path.join(package_directory, cache_filename(self.url)) # Before download self.pre_download() # Send request try: response = self.send_request() except urllib2.URLError, e: if self.is_in_local_cache: debug.warning("A network error occurred. DownloadFile will " "use a cached version of the file") return self.local_filename else: raise ModuleError( self.module, "Network error: %s" % debug.format_exception(e))
def compute(self): table = self.get_input('table') rows = table.rows xlwt = get_xlwt() if xlwt is None: # pragma: no cover raise ModuleError(self, "xlwt is not available") workbook = xlwt.Workbook() sheet = workbook.add_sheet('Sheet1') fileobj = self.interpreter.filePool.create_file(suffix='.xls') fname = fileobj.name for c in xrange(table.columns): column = table.get_column(c) for r, e in enumerate(column): sheet.write(r, c, e) if r + 1 != rows: # pragma: no cover debug.warning("WriteExcelSpreadsheet wrote %d lines instead " "of expected %d" % (r, rows)) workbook.save(fname) self.set_output('file', fileobj)
def compute(self): an = self.get_appname() if not os.path.isfile(an): raise ModuleError(self, "Application '%s' not existent" % an) cmdlist = [an] cmdlist += self.options for port_name in self.inputPorts: if port_name != 'files' and port_name != 'file' and port_name != 'application' and self.hasInputFromPort( port_name): cmdlist += [ '--' + str(port_name), str(self.getInputFromPort(port_name)) ] rf = self.getInputFromPort('files') infiles = [x.props['filename'] for x in rf] cmdlist += infiles self.execute(cmdlist) datasetmap = {} datasets = [] for infile in infiles: ofname = infile.replace('.out.xml', '.plot.*.xml') l = glob.glob(ofname) for fn in l: dataset = read_xml(fn) datasets.append(dataset) ylabel = dataset.props['ylabel'] if ylabel in datasetmap: datasetmap[ylabel].append(dataset) else: datasetmap[ylabel] = [dataset] for (port_name, ylabel) in self.plots: if ylabel in datasetmap: self.setResult(port_name, datasetmap[ylabel]) else: self.setResult(port_name, [])
def compute(self): name_output = self.get_input('OutputPort') name_condition = self.force_get_input('ConditionPort') name_state_input = self.force_get_input('StateInputPorts') name_state_output = self.force_get_input('StateOutputPorts') max_iterations = self.get_input('MaxIterations') delay = self.force_get_input('Delay') if (name_condition is None and not self.has_input('MaxIterations')): raise ModuleError(self, "Please set MaxIterations or use ConditionPort") if name_state_input or name_state_output: if not name_state_input or not name_state_output: raise ModuleError(self, "Passing state between iterations requires " "BOTH StateInputPorts and StateOutputPorts " "to be set") if len(name_state_input) != len(name_state_output): raise ModuleError(self, "StateInputPorts and StateOutputPorts need " "to have the same number of ports " "(got %d and %d)" % (len(name_state_input), len(name_state_output))) connectors = self.inputPorts.get('FunctionPort') if len(connectors) != 1: raise ModuleError(self, "Multiple modules connected on FunctionPort") module = copy.copy(connectors[0].obj) state = None loop = self.logging.begin_loop_execution(self, max_iterations) for i in xrange(max_iterations): if not self.upToDate: module.upToDate = False module.computed = False # Set state on input ports if i > 0 and name_state_input: for value, input_port, output_port \ in izip(state, name_state_input, name_state_output): if input_port in module.inputPorts: del module.inputPorts[input_port] new_connector = ModuleConnector( create_constant(value), 'value', module.output_specs.get(output_port, None)) module.set_input_port(input_port, new_connector) # Affix a fake signature on the module inputPort_hash = sha1_hash() inputPort_hash.update(input_port) module.signature = b16encode(xor( b16decode(self.signature.upper()), inputPort_hash.digest())) loop.begin_iteration(module, i) module.update() # might raise ModuleError, ModuleSuspended, # ModuleHadError, ModuleWasSuspended loop.end_iteration(module) if name_condition is not None: if name_condition not in module.outputPorts: raise ModuleError( module, "Invalid output port: %s" % name_condition) if not module.get_output(name_condition): break if delay and i+1 != max_iterations: time.sleep(delay) # Get state on output ports if name_state_output: state = [module.get_output(port) for port in name_state_output] self.logging.update_progress(self, i * 1.0 / max_iterations) loop.end_loop_execution() if name_output not in module.outputPorts: raise ModuleError(module, "Invalid output port: %s" % name_output) result = module.get_output(name_output) self.set_output('Result', result)
class Map(Module): """The Map Module executes a map operator in parallel on IPython engines. The FunctionPort should be connected to the 'self' output of the module you want to execute. The InputList is the list of values to be scattered on the engines. """ def __init__(self): Module.__init__(self) def update_upstream(self): """A modified version of the update_upstream method.""" # everything is the same except that we don't update anything # upstream of FunctionPort for port_name, connector_list in self.inputPorts.iteritems(): if port_name == 'FunctionPort': for connector in connector_list: connector.obj.update_upstream() else: for connector in connector_list: connector.obj.update() for port_name, connectorList in copy.copy(self.inputPorts.items()): if port_name != 'FunctionPort': for connector in connectorList: if connector.obj.get_output(connector.port) is \ InvalidOutput: self.remove_input_connector(port_name, connector) @staticmethod def print_compositeerror(e): sys.stderr.write("Got %d exceptions from IPython engines:\n" % len(e.elist)) for e_type, e_msg, formatted_tb, infos in e.elist: sys.stderr.write("Error from engine %d (%r):\n" % (infos['engine_id'], infos['engine_uuid'])) sys.stderr.write("%s\n" % strip_ansi_codes(formatted_tb)) @staticmethod def list_exceptions(e): return '\n'.join("% 3d: %s: %s" % (infos['engine_id'], e_type, e_msg) for e_type, e_msg, tb, infos in e.elist) def updateFunctionPort(self): """ Function to be used inside the updateUsptream method of the Map module. It updates the module connected to the FunctionPort port, executing it in parallel. """ nameInput = self.get_input('InputPort') nameOutput = self.get_input('OutputPort') rawInputList = self.get_input('InputList') # Create inputList to always have iterable elements # to simplify code if len(nameInput) == 1: element_is_iter = False inputList = [[element] for element in rawInputList] else: element_is_iter = True inputList = rawInputList workflows = [] module = None vtType = None # iterating through the connectors for connector in self.inputPorts.get('FunctionPort'): module = connector.obj # pipeline original_pipeline = connector.obj.moduleInfo['pipeline'] # module module_id = connector.obj.moduleInfo['moduleId'] vtType = original_pipeline.modules[module_id].vtType # serialize the module for each value in the list for i, element in enumerate(inputList): if element_is_iter: self.element = element else: self.element = element[0] # checking type and setting input in the module self.typeChecking(connector.obj, nameInput, inputList) self.setInputValues(connector.obj, nameInput, element, i) pipeline_db_module = original_pipeline.modules[ module_id].do_copy() # transforming a subworkflow in a group # TODO: should we also transform inner subworkflows? if pipeline_db_module.is_abstraction(): group = Group(id=pipeline_db_module.id, cache=pipeline_db_module.cache, location=pipeline_db_module.location, functions=pipeline_db_module.functions, annotations=pipeline_db_module.annotations) source_port_specs = pipeline_db_module.sourcePorts() dest_port_specs = pipeline_db_module.destinationPorts() for source_port_spec in source_port_specs: group.add_port_spec(source_port_spec) for dest_port_spec in dest_port_specs: group.add_port_spec(dest_port_spec) group.pipeline = pipeline_db_module.pipeline pipeline_db_module = group # getting highest id between functions to guarantee unique ids # TODO: can get current IdScope here? if pipeline_db_module.functions: high_id = max(function.db_id for function in pipeline_db_module.functions) else: high_id = 0 # adding function and parameter to module in pipeline # TODO: 'pos' should not be always 0 here id_scope = IdScope(beginId=long(high_id + 1)) for elementValue, inputPort in izip(element, nameInput): p_spec = pipeline_db_module.get_port_spec( inputPort, 'input') descrs = p_spec.descriptors() if len(descrs) != 1: raise ModuleError( self, "Tuple input ports are not supported") if not issubclass(descrs[0].module, Constant): raise ModuleError( self, "Module inputs should be Constant types") type = p_spec.sigstring[1:-1] mod_function = ModuleFunction(id=id_scope.getNewId( ModuleFunction.vtType), pos=0, name=inputPort) mod_param = ModuleParam(id=0L, pos=0, type=type, val=elementValue) mod_function.add_parameter(mod_param) pipeline_db_module.add_function(mod_function) # serializing module wf = self.serialize_module(pipeline_db_module) workflows.append(wf) # getting first connector, ignoring the rest break # IPython stuff try: rc = get_client() except Exception, error: raise ModuleError( self, "Exception while loading IPython: %s" % debug.format_exception(error)) if rc is None: raise ModuleError(self, "Couldn't get an IPython connection") engines = rc.ids if not engines: raise ModuleError( self, "Exception while loading IPython: No IPython engines " "detected!") # initializes each engine # importing modules and initializing the VisTrails application # in the engines *only* in the first execution on this engine uninitialized = [] for eng in engines: try: rc[eng]['init'] except Exception: uninitialized.append(eng) if uninitialized: init_view = rc[uninitialized] with init_view.sync_imports(): import tempfile import inspect # VisTrails API import vistrails import vistrails.core import vistrails.core.db.action import vistrails.core.application import vistrails.core.modules.module_registry from vistrails.core.db.io import serialize from vistrails.core.vistrail.vistrail import Vistrail from vistrails.core.vistrail.pipeline import Pipeline from vistrails.core.db.locator import XMLFileLocator from vistrails.core.vistrail.controller import VistrailController from vistrails.core.interpreter.default import get_default_interpreter # initializing a VisTrails application try: init_view.execute( 'app = vistrails.core.application.init(' ' {"spawned": True},' ' args=[])', block=True) except CompositeError, e: self.print_compositeerror(e) raise ModuleError( self, "Error initializing application on " "IPython engines:\n" "%s" % self.list_exceptions(e)) init_view['init'] = True
raise ModuleError( self, "SQLAlchemy couldn't connect: %s" % debug.format_exception(e)) if not installed: raise ModuleError(self, "Failed to install required driver") try: engine = create_engine(url) except Exception, e: raise ModuleError( self, "Couldn't connect to the database: %s" % debug.format_exception(e)) except SQLAlchemyError: # This is NoSuchModuleError in newer versions of SQLAlchemy but we # want compatibility here raise ModuleError( self, "SQLAlchemy has no support for protocol %r -- are you " "sure you spelled that correctly?" % url.drivername) self.set_output('connection', engine.connect()) class SQLSource(Module): _settings = ModuleSettings( configure_widget= 'vistrails.packages.sql.widgets:SQLSourceConfigurationWidget') _input_ports = [('connection', '(DBConnection)'), ('cacheResults', '(basic:Boolean)'), ('source', '(basic:String)')] _output_ports = [('result', '(org.vistrails.vistrails.tabledata:Table)'), ('resultSet', '(basic:List)')]
def updateFunctionPort(self): """ Function to be used inside the updateUsptream method of the Map module. It updates the module connected to the FunctionPort port, executing it in parallel. """ nameInput = self.get_input('InputPort') nameOutput = self.get_input('OutputPort') rawInputList = self.get_input('InputList') # Create inputList to always have iterable elements # to simplify code if len(nameInput) == 1: element_is_iter = False inputList = [[element] for element in rawInputList] else: element_is_iter = True inputList = rawInputList workflows = [] module = None vtType = None # iterating through the connectors for connector in self.inputPorts.get('FunctionPort'): module = connector.obj # pipeline original_pipeline = connector.obj.moduleInfo['pipeline'] # module module_id = connector.obj.moduleInfo['moduleId'] vtType = original_pipeline.modules[module_id].vtType # serialize the module for each value in the list for i, element in enumerate(inputList): if element_is_iter: self.element = element else: self.element = element[0] # checking type and setting input in the module self.typeChecking(connector.obj, nameInput, inputList) self.setInputValues(connector.obj, nameInput, element, i) pipeline_db_module = original_pipeline.modules[ module_id].do_copy() # transforming a subworkflow in a group # TODO: should we also transform inner subworkflows? if pipeline_db_module.is_abstraction(): group = Group(id=pipeline_db_module.id, cache=pipeline_db_module.cache, location=pipeline_db_module.location, functions=pipeline_db_module.functions, annotations=pipeline_db_module.annotations) source_port_specs = pipeline_db_module.sourcePorts() dest_port_specs = pipeline_db_module.destinationPorts() for source_port_spec in source_port_specs: group.add_port_spec(source_port_spec) for dest_port_spec in dest_port_specs: group.add_port_spec(dest_port_spec) group.pipeline = pipeline_db_module.pipeline pipeline_db_module = group # getting highest id between functions to guarantee unique ids # TODO: can get current IdScope here? if pipeline_db_module.functions: high_id = max(function.db_id for function in pipeline_db_module.functions) else: high_id = 0 # adding function and parameter to module in pipeline # TODO: 'pos' should not be always 0 here id_scope = IdScope(beginId=long(high_id + 1)) for elementValue, inputPort in izip(element, nameInput): p_spec = pipeline_db_module.get_port_spec( inputPort, 'input') descrs = p_spec.descriptors() if len(descrs) != 1: raise ModuleError( self, "Tuple input ports are not supported") if not issubclass(descrs[0].module, Constant): raise ModuleError( self, "Module inputs should be Constant types") type = p_spec.sigstring[1:-1] mod_function = ModuleFunction(id=id_scope.getNewId( ModuleFunction.vtType), pos=0, name=inputPort) mod_param = ModuleParam(id=0L, pos=0, type=type, val=elementValue) mod_function.add_parameter(mod_param) pipeline_db_module.add_function(mod_function) # serializing module wf = self.serialize_module(pipeline_db_module) workflows.append(wf) # getting first connector, ignoring the rest break # IPython stuff try: rc = get_client() except Exception, error: raise ModuleError( self, "Exception while loading IPython: %s" % debug.format_exception(error))