Exemple #1
0
    def compute(self):
        path = self.get_input('directory').name

        if not os.path.exists(path):
            self.setup_experiment()
        if (not os.path.exists(os.path.join(path, 'config.yml'))
                or not os.path.exists(os.path.join(path, '.reprounzip'))):
            raise ModuleError(
                self, "Directory doesn't contain the necessary file; "
                "is an experiment set up there?")
        with open(os.path.join(path, '.reprounzip'), 'rb') as fp:
            unpacked_info = pickle.load(fp)
        unpacker = unpacked_info['unpacker']

        self.set_output('experiment', Experiment(path, unpacker))
Exemple #2
0
 def job_start(self, params):
     work_dir = params['working_directory']
     self.machine = self.get_machine()
     use_machine(self.machine)
     self.job = Subshell("remote", params['command'], work_dir)
     self.job.run()
     ret = self.job._ret
     if ret:
         try:
             job_id = int(ret.split('\n')[0])
         except ValueError:
             end_machine()
             raise ModuleError(self, "Error submitting job: %s" % ret)
     self.set_job_machine(params, self.machine)
     return params
Exemple #3
0
    def compute(self):
        dict_from_port = {}
        params_dict = {}
        if dict_port is not None:
            dict_from_port = self.get_input(dict_port.name)

        for opt in optional:
            if opt in dict_from_port:
                # obtain the parameter from the passed in dict
                params_dict[opt] = dict_from_port[opt]
            if self.has_input(opt):
                if opt in vt_reserved:
                    p_name = '_' + opt
                else:
                    p_name = opt
                params_dict[opt] = self.get_input(p_name)

        for mand in mandatory:
            if mand in vt_reserved:
                p_name = '_' + mand
            else:
                p_name = mand
            if mand in dict_from_port:
                params_dict[mand] = dict_from_port[mand]
            try:
                params_dict[mand] = self.get_input(p_name)
            except ModuleError as me:
                if mand in params_dict:
                    # pass on this exception, as the dictionary on dict_port
                    # has taken care of this key
                    pass
                else:
                    logger.debug('The mandatory port {0} does not have input'
                                 'and the input dictionary is either not '
                                 'present or doesn\'t contain this key'
                                 ''.format(mand))
                    raise ModuleError(__name__, me)
        # check for the presence of a 'value' attribute on the incoming
        # port values. This indicates that this is a NSLS2 port type
        for name, val in six.iteritems(params_dict):
            if hasattr(val, 'value'):
                params_dict[name] = val.value
        ret = library_func(**params_dict)
        if len(output_ports) == 1:
            self.set_output(output_ports[0].name, ret)
        else:
            for (out_port, ret_val) in zip(output_ports, ret):
                self.set_output(out_port.name, ret_val)
Exemple #4
0
    def compute(self):
        table = self.get_input('table')
        try:
            column_idx = choose_column(
                table.columns,
                column_names=table.names,
                name=self.force_get_input('column_name', None),
                index=self.force_get_input('column_index', None))

            self.set_output(
                'value',
                table.get_column(column_idx,
                                 self.get_input('numeric',
                                                allow_default=True)))
        except ValueError, e:
            raise ModuleError(self, e.message)
Exemple #5
0
    def compute(self):
        if self.has_input('title'):
            title = self.get_input('title')
        else:
            title = 'VisTrails Dialog'
        label = self.get_input('label')

        default = self.get_input('default')

        self.cacheable_dialog = self.get_input('cacheable')

        (result, ok) = QtGui.QInputDialog.getText(None, title, label,
                                                  self.mode, default)
        if not ok:
            raise ModuleError(self, "Canceled")
        self.set_output('result', str(result))
Exemple #6
0
    def update_upstream(self):
        """ Decides whether or not to run the upstream.

        If a job has already been submitted and the local JobMonitor knows of
        it, we don't need to run upstream modules to check the status.

        If status check indicates that the job no longer exists, then we should
        run upstream then submit again.
        """
        if not hasattr(self, 'signature'):
            raise ModuleError(self, "Module has no signature")
        jm = self.job_monitor()
        if not (jm.getCache(self.signature) or jm.getJob(self.signature)):
            # We need to submit a new job
            # Update upstream, compute() will need it
            super(JobMixin, self).update_upstream()
Exemple #7
0
def path_port(module, portName):
    value = module.forceGetInputListFromPort(portName)
    if len(value) > 1:
        raise ModuleError(
            module, 'Multiple items found from Port ' + portName +
            '.  Only single entry handled.  Please remove extraneous items.')
    value = value[0]
    path = value.name
    path = path.replace("/", os.path.sep)
    if os.path.exists(path):
        return path
    elif os.path.exists(getFileRelativeToCurrentVT(path, module)):
        return getFileRelativeToCurrentVT(path, module)
    else:
        raise RuntimeError, 'The indicated file or directory, ' + \
            path + ', does not exist on the file system.  Cannot continue!'
Exemple #8
0
    def compute_output(self, output_module, configuration):
        format_map = {
            'png': vtk.vtkPNGWriter,
            'jpg': vtk.vtkJPEGWriter,
            'tif': vtk.vtkTIFFWriter,
            'pnm': vtk.vtkPNMWriter
        }
        r = output_module.get_input("value")[0].vtkInstance
        w = configuration["width"]
        h = configuration["height"]
        img_format = self.get_format(configuration)
        if img_format not in format_map:
            raise ModuleError(output_module,
                              'Cannot output in format "%s"' % img_format)
        fname = self.get_filename(configuration, suffix='.%s' % img_format)

        render_to_image(fname, format_map[img_format], r, w, h)
Exemple #9
0
    def update_upstream(self):
        """A modified version of the update_upstream method.

        Only updates upstream if the file is not found in the store.
        """
        if not hasattr(self, 'signature'):
            raise ModuleError(self, "Module has no signature")
        file_store = get_default_store()
        entries = file_store.query({KEY_SIGNATURE: self.signature})
        best = None
        for entry in entries:
            if best is None or entry[KEY_TIME] > best[KEY_TIME]:
                best = entry
        if best is not None:
            self._cached = best.filename
        else:
            super(PersistedPath, self).update_upstream()
Exemple #10
0
class FileToFileMode(FileMode):
    def compute_output(self, output_module, configuration=None):
        old_fname = output_module.get_input('value').name
        full_path = self.get_filename(configuration)
        # we know we are in overwrite mode because it would have been
        # flagged otherwise
        if os.path.exists(full_path):
            try:
                os.remove(full_path)
            except OSError, e:
                raise ModuleError(output_module, ('Could not delete existing '
                                                  'path "%s"' % full_path))
        try:
            vistrails.core.system.link_or_copy(old_fname, full_path)
        except OSError, e:
            msg = "Could not create file '%s': %s" % (full_path, e)
            raise ModuleError(output_module, msg)
Exemple #11
0
 def compute(self):
     port_object = None
     if self.has_input("SetInputConnection0"):
         ic = self.get_input("SetInputConnection0")
         if hasattr(ic, "vtkInstance"):
             ic = ic.vtkInstance
         producer = ic.GetProducer()
         try:
             port_object = producer.GetOutput()
         except AttributeError:
             raise ModuleError(self,
                               "expected a module that supports GetOutput")
     elif self.has_input("SetInput"):
         port_object = self.get_input("SetInput")
         if hasattr(port_object, "vtkInstance"):
             port_object = port_object.vtkInstance
     if port_object:
         self.auto_set_results(port_object)
Exemple #12
0
class ProjectTable(Table):
    """Build a table from the columns of another table.

    This allows you to restrict, reorder or rename the columns of a table. You
    can also duplicate columns by mentioning them several times.
    """
    _input_ports = [("table", "Table"),
                    ("column_names", "basic:List"),
                    ("column_indexes", "basic:List"),
                    ("new_column_names", "basic:List",
                     {"optional": True})]
    _output_ports = [("value", Table)]

    def compute(self):
        table = self.get_input("table")
        try:
            indexes = choose_columns(
                    table.columns,
                    column_names=table.names,
                    names=self.force_get_input('column_names', None),
                    indexes=self.force_get_input('column_indexes', None))
        except ValueError, e:
            raise ModuleError(self, e.message)
        if self.has_input('new_column_names'):
            column_names = self.get_input('new_column_names')
            if len(column_names) != len(indexes):
                raise ModuleError(self,
                                  "new_column_names was specified but doesn't "
                                  "have the right number of names")
        else:
            column_names = []
            names = {}
            for i in indexes:
                name = table.names[i]
                if name in names:
                    nb = names[name]
                    names[name] += 1
                    name = '%s_%d' % (name, nb)
                else:
                    names[name] = 1
                column_names.append(name)

        projected_table = ProjectedTable(table, indexes, column_names)
        self.set_output("value", projected_table)
Exemple #13
0
    def call_hadoop(self, arguments, workdir, identifier, machine):
        self.is_cacheable = lambda *args, **kwargs: False
        config = self.get_hadoop_config(machine)
        argList = [config['hadoop']]
        if type(arguments) in [str, unicode]:
            argList += arguments.split(' ')
        elif type(arguments) == list:
            argList += arguments
        else:
            raise ModuleError(self, 'Invalid argument types to hadoop')

        # 1. this version returns when finished
        #return subprocess.call(argList)
        # 2. this version reads the results incrementally
#         expect = machine.remote._expect_token
#         machine.remote.push_expect(None) # Do not wait for call to finish
#         result =  machine.remote.send_command(" ".join(argList)).strip()
#         machine.remote.pop_expect() # restore expect
#         # We could show the output in a gui
#         print "**** hadoop streaming running ****"
#         print result,
#         while not expect in result:
#             output = machine.remote.consume_output()
#             if output:
#                 print output,
#             result += output
# 3. The final version should detach the process on the server
        use_machine(machine)
        cdir = CreateDirectory("remote", workdir)
        job = Subshell("remote",
                       command=" ".join(argList),
                       working_directory=workdir,
                       identifier=identifier,
                       dependencies=[cdir])
        job.run()
        finished = job.finished()
        if not finished:
            status = job.status()
            # The Subshell class provides the JobHandle interface, i.e.
            # finished()
            raise ModuleSuspended(self, '%s' % status, handle=job)
        self.is_cacheable = lambda *args, **kwargs: True
        return job.standard_error()
Exemple #14
0
    def _open_ssh(self, username, password, hostname, portnum, path):
        paramiko = py_import('paramiko', {
                'pip': 'paramiko',
                'linux-debian': 'python-paramiko',
                'linux-ubuntu': 'python-paramiko',
                'linux-fedora': 'python-paramiko'})
        scp = py_import('scp', {
                'pip': 'scp'})

        local_filename = os.path.join(package_directory,
                                      cache_filename(self.url))

        ssh = paramiko.SSHClient()
        ssh.load_system_host_keys()
        try:
            ssh.connect(hostname, port=portnum,
                        username=username, password=password)
        except paramiko.SSHException, e:
            raise ModuleError(self.module, debug.format_exception(e))
Exemple #15
0
    def run(self, *args):
        """run(*args), runs ImageMagick's 'convert' on a shell, passing all
        arguments to the program.

        """
        path = None
        if configuration.check('path'):
            path = configuration.path
        if path:
            cmd = os.path.join(path, 'convert')
        else:
            cmd = 'convert'
        cmd = [cmd] + list(args)
        cmdline = list2cmdline(cmd)
        if not configuration.quiet:
            debug.log(cmdline)
        r = os.system(cmdline)
        if r != 0:
            raise ModuleError(self, "system call failed: %r" % cmdline)
Exemple #16
0
    def compute_output(self, output_module, configuration):
        format_map = {
            'png': vtk.vtkPNGWriter,
            'jpg': vtk.vtkJPEGWriter,
            'tif': vtk.vtkTIFFWriter,
            'pnm': vtk.vtkPNMWriter
        }
        r = output_module.get_input("value").vtkInstance
        w = configuration["width"]
        h = configuration["height"]
        img_format = self.get_format(configuration)
        if img_format not in format_map:
            raise ModuleError(output_module,
                              'Cannot output in format "%s"' % img_format)
        fname = self.get_filename(configuration, suffix='.%s' % img_format)

        window = vtk.vtkRenderWindow()
        window.OffScreenRenderingOn()
        window.SetSize(w, h)

        # FIXME think this may be fixed in VTK6 so we don't have this
        # dependency...
        widget = None
        if vistrails.core.system.systemType == 'Darwin':
            from PyQt4 import QtCore, QtGui
            widget = QtGui.QWidget(None, QtCore.Qt.FramelessWindowHint)
            widget.resize(w, h)
            widget.show()
            window.SetWindowInfo(str(int(widget.winId())))

        window.AddRenderer(r)
        window.Render()
        win2image = vtk.vtkWindowToImageFilter()
        win2image.SetInput(window)
        win2image.Update()
        writer = format_map[img_format]()
        writer.SetInput(win2image.GetOutput())
        writer.SetFileName(fname)
        writer.Write()
        window.Finalize()
        if widget != None:
            widget.close()
Exemple #17
0
    def __init__(self):
        Module.__init__(self)

        config = get_vistrails_persistent_configuration()
        if config.check('webRepositoryURL'):
            self.base_url = config.webRepositoryURL
        else:
            raise ModuleError(self, ("No webRepositoryURL value defined"
                                     " in the Expert Configuration"))

        # check if we are running in server mode
        # this effects how the compute method functions
        if config.check('isInServerMode'):
            self.is_server = bool(config.isInServerMode)
        else:
            self.is_server = False

        # TODO: this '/' check should probably be done in core/configuration.py
        if self.base_url[-1] == '/':
            self.base_url = self.base_url[:-1]
Exemple #18
0
    def read_log(self, fname):
        # open the .vt bundle specified by the filename "fname"
        bundle = vistrails.db.services.io.open_vistrail_bundle_from_zip_xml(
            fname)[0]

        # get the log filename
        log_fname = bundle.vistrail.db_log_filename

        if log_fname is not None:
            # open the log
            log = vistrails.db.services.io.open_log_from_xml(log_fname, True)

            # convert the log from a db object
            vistrails.core.log.log.Log.convert(log)
            return log
        if log_fname is None:
            # throw error message
            raise ModuleError(self, "No log file accessible")

        return None
Exemple #19
0
 def compute(self):
     machine = self.get_machine()
     jm = self.job_monitor()
     id = self.signature
     job = jm.getCache(id)
     if not job:
         uri = self.force_get_input('HDFS File/URI')
         symlink = self.force_get_input('Symlink')
         if uri==None or symlink==None:
             raise ModuleError(self,
                             "Missing 'HDFS File/URI' or 'Symlink' values")
         if '://' not in uri:
             uri = self.add_prefix(uri, machine)
         uri += '#' + symlink
         d = {'uri':uri}
         self.set_job_machine(d, machine)
         jm.setCache(id, d, self.job_name())
         job = jm.getCache(id)
     self.set_output('URI', job.parameters['uri'])
     self.set_output('Machine', machine)
Exemple #20
0
 def get_hadoop_config(self, machine):
     if HadoopBaseModule.hadoop_configuration == None:
         hadoop_home = self.get_hadoop_home(machine)
         # paths to try in order
         streaming_paths = [
             '/share/hadoop/tools/lib/',  # AWS
             '/usr/lib/hadoop-mapreduce/',  # NYU/CUSP
             '/contrib/streaming/'
         ]
         for path in streaming_paths:
             hs = hadoop_home + path
             command = ("python -c \"import os, os.path; print '' if not "
                        "os.path.exists('{0}') else ''.join([i for i in "
                        "os.listdir('{0}') if 'streaming' in i][-1:])\""
                        ).format(hs)
             streamingjar = machine.remote.send_command(command).strip()
             if streamingjar:
                 break
         if not streamingjar:
             raise ModuleError(
                 self, 'hadoop-streaming.jar not found. Please add '
                 'its directory to list of supported paths.')
         hadoop = (hadoop_home + '/bin/hadoop') if hadoop_home else 'hadoop'
         hdfs = (hadoop_home + '/bin/hdfs') if hadoop_home else 'hdfs'
         if not machine.remote.command_exists(hdfs):
             hdfs = hadoop
         config = {
             'home': hadoop_home,
             'hadoop': hadoop,
             'hdfs': hdfs,
             'streaming.jar': hs + streamingjar
         }
         HadoopBaseModule.hadoop_configuration = config
         # reading configuration files are error-prone
         #self.read_site_config(machine)
         config['fs.defaultFS'] = ''
         # can access config only if hdfs command exists
         if hadoop != hdfs:
             config['fs.defaultFS'] = \
                 self.call_hdfs('getconf -confKey fs.defaultFS', machine)
     return HadoopBaseModule.hadoop_configuration
Exemple #21
0
        def call_it(function, p):
            # Translate between VisTrails objects and VTK objects
            if p is None:
                # None indicates a call with no parameters
                params = []
            elif isinstance(p, tuple):
                # A tuple indicates a call with many parameters
                params = list(p)
            else:
                # Otherwise, it's a single parameter
                params = [p]

            # Unwraps VTK objects
            for i in xrange(len(params)):
                if hasattr(params[i], 'vtkInstance'):
                    params[i] = params[i].vtkInstance
            try:
                self.call_input_function(function, params)
            except Exception, e:
                raise ModuleError(
                    self, "VTK Exception: %s" % debug.format_exception(e))
Exemple #22
0
 def startJob(self, params):
     work_dir = params['working_directory']
     use_machine(self.machine)
     self.cdir = CreateDirectory("remote", work_dir)
     trans = TransferFiles("remote",
                           params['input_directory'],
                           work_dir,
                           dependencies=[self.cdir])
     self.job = PBSScript("remote",
                          params['command'],
                          work_dir,
                          dependencies=[trans],
                          **params['additional_arguments'])
     self.job.run()
     try:
         ret = self.job._ret
         if ret:
             job_id = int(ret)
     except ValueError:
         end_machine()
         raise ModuleError(self, "Error submitting job: %s" % ret)
     return params
Exemple #23
0
    def compute(self):
        machine = self.get_machine()
        jm = self.job_monitor()
        id = self.signature
        job = jm.getCache(id)
        if not job:
            remote = self.get_input('Remote Location')
            local = self.get_input('Local File')
            override = self.force_get_input('Override', False)
            if '://' not in remote:
                remote = self.add_prefix(remote, machine)
            if os.path.exists(local.name):
                if override == False:
                    raise ModuleError(self, 'Output already exists')
                else:
                    if os.path.isdir(local.name):
                        shutil.rmtree(local.name)
                    else:
                        os.unlink(local.name)

            tempfile = machine.remote.send_command('mktemp -d -u').strip()
            result = self.call_hdfs('dfs -get %s %s' % (remote, tempfile),
                                    machine)
            # too slow with many files
            #res = machine.send_command("get -r %s %s" % (tempfile, local.name) )
            # tar files to increase speed
            result = machine.local.send_command('mkdir %s' % local.name)
            result = machine.sync(local.name,
                                  tempfile,
                                  mode=machine.MODE_REMOTE_LOCAL,
                                  use_tar=True)
            result = machine.remote.rm(tempfile, force=True, recursively=True)
            d = {'remote': remote, 'local': local.name}
            self.set_job_machine(d, machine)
            jm.setCache(id, d, self.job_name())
            job = jm.getCache(id)
        self.set_output('Local File', PathObject(job.parameters['local']))
        self.set_output('Machine', machine)
Exemple #24
0
    def execute(self):
        """ Tries to download a file from url.

        Returns the path to the local file.
        """
        self.local_filename = os.path.join(package_directory,
                                           cache_filename(self.url))

        # Before download
        self.pre_download()

        # Send request
        try:
            response = self.send_request()
        except urllib2.URLError, e:
            if self.is_in_local_cache:
                debug.warning("A network error occurred. DownloadFile will "
                              "use a cached version of the file")
                return self.local_filename
            else:
                raise ModuleError(
                        self.module,
                        "Network error: %s" % debug.format_exception(e))
Exemple #25
0
    def compute(self):
        table = self.get_input('table')
        rows = table.rows

        xlwt = get_xlwt()
        if xlwt is None:  # pragma: no cover
            raise ModuleError(self, "xlwt is not available")

        workbook = xlwt.Workbook()
        sheet = workbook.add_sheet('Sheet1')

        fileobj = self.interpreter.filePool.create_file(suffix='.xls')
        fname = fileobj.name

        for c in xrange(table.columns):
            column = table.get_column(c)
            for r, e in enumerate(column):
                sheet.write(r, c, e)
            if r + 1 != rows:  # pragma: no cover
                debug.warning("WriteExcelSpreadsheet wrote %d lines instead "
                              "of expected %d" % (r, rows))

        workbook.save(fname)
        self.set_output('file', fileobj)
    def compute(self):
        an = self.get_appname()
        if not os.path.isfile(an):
            raise ModuleError(self, "Application '%s' not existent" % an)
        cmdlist = [an]
        cmdlist += self.options
        for port_name in self.inputPorts:
            if port_name != 'files' and port_name != 'file' and port_name != 'application' and self.hasInputFromPort(
                    port_name):
                cmdlist += [
                    '--' + str(port_name),
                    str(self.getInputFromPort(port_name))
                ]
        rf = self.getInputFromPort('files')
        infiles = [x.props['filename'] for x in rf]
        cmdlist += infiles
        self.execute(cmdlist)
        datasetmap = {}
        datasets = []
        for infile in infiles:
            ofname = infile.replace('.out.xml', '.plot.*.xml')
            l = glob.glob(ofname)
            for fn in l:
                dataset = read_xml(fn)
                datasets.append(dataset)
                ylabel = dataset.props['ylabel']
                if ylabel in datasetmap:
                    datasetmap[ylabel].append(dataset)
                else:
                    datasetmap[ylabel] = [dataset]

        for (port_name, ylabel) in self.plots:
            if ylabel in datasetmap:
                self.setResult(port_name, datasetmap[ylabel])
            else:
                self.setResult(port_name, [])
Exemple #27
0
    def compute(self):
        name_output = self.get_input('OutputPort')
        name_condition = self.force_get_input('ConditionPort')
        name_state_input = self.force_get_input('StateInputPorts')
        name_state_output = self.force_get_input('StateOutputPorts')
        max_iterations = self.get_input('MaxIterations')
        delay = self.force_get_input('Delay')

        if (name_condition is None and
                not self.has_input('MaxIterations')):
            raise ModuleError(self,
                              "Please set MaxIterations or use ConditionPort")

        if name_state_input or name_state_output:
            if not name_state_input or not name_state_output:
                raise ModuleError(self,
                                  "Passing state between iterations requires "
                                  "BOTH StateInputPorts and StateOutputPorts "
                                  "to be set")
            if len(name_state_input) != len(name_state_output):
                raise ModuleError(self,
                                  "StateInputPorts and StateOutputPorts need "
                                  "to have the same number of ports "
                                  "(got %d and %d)" % (len(name_state_input),
                                                       len(name_state_output)))

        connectors = self.inputPorts.get('FunctionPort')
        if len(connectors) != 1:
            raise ModuleError(self,
                              "Multiple modules connected on FunctionPort")
        module = copy.copy(connectors[0].obj)

        state = None

        loop = self.logging.begin_loop_execution(self, max_iterations)
        for i in xrange(max_iterations):
            if not self.upToDate:
                module.upToDate = False
                module.computed = False

                # Set state on input ports
                if i > 0 and name_state_input:
                    for value, input_port, output_port \
                    in izip(state, name_state_input, name_state_output):
                        if input_port in module.inputPorts:
                            del module.inputPorts[input_port]
                        new_connector = ModuleConnector(
                                           create_constant(value), 'value',
                                           module.output_specs.get(output_port, None))
                        module.set_input_port(input_port, new_connector)
                        # Affix a fake signature on the module
                        inputPort_hash = sha1_hash()
                        inputPort_hash.update(input_port)
                        module.signature = b16encode(xor(
                                b16decode(self.signature.upper()),
                                inputPort_hash.digest()))

            loop.begin_iteration(module, i)

            module.update() # might raise ModuleError, ModuleSuspended,
                            # ModuleHadError, ModuleWasSuspended

            loop.end_iteration(module)

            if name_condition is not None:
                if name_condition not in module.outputPorts:
                    raise ModuleError(
                            module,
                            "Invalid output port: %s" % name_condition)
                if not module.get_output(name_condition):
                    break

            if delay and i+1 != max_iterations:
                time.sleep(delay)

            # Get state on output ports
            if name_state_output:
                state = [module.get_output(port) for port in name_state_output]

            self.logging.update_progress(self, i * 1.0 / max_iterations)

        loop.end_loop_execution()

        if name_output not in module.outputPorts:
            raise ModuleError(module,
                              "Invalid output port: %s" % name_output)
        result = module.get_output(name_output)
        self.set_output('Result', result)
Exemple #28
0
class Map(Module):
    """The Map Module executes a map operator in parallel on IPython engines.

    The FunctionPort should be connected to the 'self' output of the module you
    want to execute.
    The InputList is the list of values to be scattered on the engines.
    """
    def __init__(self):
        Module.__init__(self)

    def update_upstream(self):
        """A modified version of the update_upstream method."""

        # everything is the same except that we don't update anything
        # upstream of FunctionPort
        for port_name, connector_list in self.inputPorts.iteritems():
            if port_name == 'FunctionPort':
                for connector in connector_list:
                    connector.obj.update_upstream()
            else:
                for connector in connector_list:
                    connector.obj.update()
        for port_name, connectorList in copy.copy(self.inputPorts.items()):
            if port_name != 'FunctionPort':
                for connector in connectorList:
                    if connector.obj.get_output(connector.port) is \
                            InvalidOutput:
                        self.remove_input_connector(port_name, connector)

    @staticmethod
    def print_compositeerror(e):
        sys.stderr.write("Got %d exceptions from IPython engines:\n" %
                         len(e.elist))
        for e_type, e_msg, formatted_tb, infos in e.elist:
            sys.stderr.write("Error from engine %d (%r):\n" %
                             (infos['engine_id'], infos['engine_uuid']))
            sys.stderr.write("%s\n" % strip_ansi_codes(formatted_tb))

    @staticmethod
    def list_exceptions(e):
        return '\n'.join("% 3d: %s: %s" % (infos['engine_id'], e_type, e_msg)
                         for e_type, e_msg, tb, infos in e.elist)

    def updateFunctionPort(self):
        """
        Function to be used inside the updateUsptream method of the Map module. It
        updates the module connected to the FunctionPort port, executing it in
        parallel.
        """
        nameInput = self.get_input('InputPort')
        nameOutput = self.get_input('OutputPort')
        rawInputList = self.get_input('InputList')

        # Create inputList to always have iterable elements
        # to simplify code
        if len(nameInput) == 1:
            element_is_iter = False
            inputList = [[element] for element in rawInputList]
        else:
            element_is_iter = True
            inputList = rawInputList

        workflows = []
        module = None
        vtType = None

        # iterating through the connectors
        for connector in self.inputPorts.get('FunctionPort'):
            module = connector.obj

            # pipeline
            original_pipeline = connector.obj.moduleInfo['pipeline']

            # module
            module_id = connector.obj.moduleInfo['moduleId']
            vtType = original_pipeline.modules[module_id].vtType

            # serialize the module for each value in the list
            for i, element in enumerate(inputList):
                if element_is_iter:
                    self.element = element
                else:
                    self.element = element[0]

                # checking type and setting input in the module
                self.typeChecking(connector.obj, nameInput, inputList)
                self.setInputValues(connector.obj, nameInput, element, i)

                pipeline_db_module = original_pipeline.modules[
                    module_id].do_copy()

                # transforming a subworkflow in a group
                # TODO: should we also transform inner subworkflows?
                if pipeline_db_module.is_abstraction():
                    group = Group(id=pipeline_db_module.id,
                                  cache=pipeline_db_module.cache,
                                  location=pipeline_db_module.location,
                                  functions=pipeline_db_module.functions,
                                  annotations=pipeline_db_module.annotations)

                    source_port_specs = pipeline_db_module.sourcePorts()
                    dest_port_specs = pipeline_db_module.destinationPorts()
                    for source_port_spec in source_port_specs:
                        group.add_port_spec(source_port_spec)
                    for dest_port_spec in dest_port_specs:
                        group.add_port_spec(dest_port_spec)

                    group.pipeline = pipeline_db_module.pipeline
                    pipeline_db_module = group

                # getting highest id between functions to guarantee unique ids
                # TODO: can get current IdScope here?
                if pipeline_db_module.functions:
                    high_id = max(function.db_id
                                  for function in pipeline_db_module.functions)
                else:
                    high_id = 0

                # adding function and parameter to module in pipeline
                # TODO: 'pos' should not be always 0 here
                id_scope = IdScope(beginId=long(high_id + 1))
                for elementValue, inputPort in izip(element, nameInput):

                    p_spec = pipeline_db_module.get_port_spec(
                        inputPort, 'input')
                    descrs = p_spec.descriptors()
                    if len(descrs) != 1:
                        raise ModuleError(
                            self, "Tuple input ports are not supported")
                    if not issubclass(descrs[0].module, Constant):
                        raise ModuleError(
                            self, "Module inputs should be Constant types")
                    type = p_spec.sigstring[1:-1]

                    mod_function = ModuleFunction(id=id_scope.getNewId(
                        ModuleFunction.vtType),
                                                  pos=0,
                                                  name=inputPort)
                    mod_param = ModuleParam(id=0L,
                                            pos=0,
                                            type=type,
                                            val=elementValue)

                    mod_function.add_parameter(mod_param)
                    pipeline_db_module.add_function(mod_function)

                # serializing module
                wf = self.serialize_module(pipeline_db_module)
                workflows.append(wf)

            # getting first connector, ignoring the rest
            break

        # IPython stuff
        try:
            rc = get_client()
        except Exception, error:
            raise ModuleError(
                self, "Exception while loading IPython: %s" %
                debug.format_exception(error))
        if rc is None:
            raise ModuleError(self, "Couldn't get an IPython connection")
        engines = rc.ids
        if not engines:
            raise ModuleError(
                self, "Exception while loading IPython: No IPython engines "
                "detected!")

        # initializes each engine
        # importing modules and initializing the VisTrails application
        # in the engines *only* in the first execution on this engine
        uninitialized = []
        for eng in engines:
            try:
                rc[eng]['init']
            except Exception:
                uninitialized.append(eng)
        if uninitialized:
            init_view = rc[uninitialized]
            with init_view.sync_imports():
                import tempfile
                import inspect

                # VisTrails API
                import vistrails
                import vistrails.core
                import vistrails.core.db.action
                import vistrails.core.application
                import vistrails.core.modules.module_registry
                from vistrails.core.db.io import serialize
                from vistrails.core.vistrail.vistrail import Vistrail
                from vistrails.core.vistrail.pipeline import Pipeline
                from vistrails.core.db.locator import XMLFileLocator
                from vistrails.core.vistrail.controller import VistrailController
                from vistrails.core.interpreter.default import get_default_interpreter

            # initializing a VisTrails application
            try:
                init_view.execute(
                    'app = vistrails.core.application.init('
                    '        {"spawned": True},'
                    '        args=[])',
                    block=True)
            except CompositeError, e:
                self.print_compositeerror(e)
                raise ModuleError(
                    self, "Error initializing application on "
                    "IPython engines:\n"
                    "%s" % self.list_exceptions(e))

            init_view['init'] = True
Exemple #29
0
                raise ModuleError(
                    self, "SQLAlchemy couldn't connect: %s" %
                    debug.format_exception(e))
            if not installed:
                raise ModuleError(self, "Failed to install required driver")
            try:
                engine = create_engine(url)
            except Exception, e:
                raise ModuleError(
                    self, "Couldn't connect to the database: %s" %
                    debug.format_exception(e))
        except SQLAlchemyError:
            # This is NoSuchModuleError in newer versions of SQLAlchemy but we
            # want compatibility here
            raise ModuleError(
                self, "SQLAlchemy has no support for protocol %r -- are you "
                "sure you spelled that correctly?" % url.drivername)

        self.set_output('connection', engine.connect())


class SQLSource(Module):
    _settings = ModuleSettings(
        configure_widget=
        'vistrails.packages.sql.widgets:SQLSourceConfigurationWidget')
    _input_ports = [('connection', '(DBConnection)'),
                    ('cacheResults', '(basic:Boolean)'),
                    ('source', '(basic:String)')]
    _output_ports = [('result', '(org.vistrails.vistrails.tabledata:Table)'),
                     ('resultSet', '(basic:List)')]
Exemple #30
0
    def updateFunctionPort(self):
        """
        Function to be used inside the updateUsptream method of the Map module. It
        updates the module connected to the FunctionPort port, executing it in
        parallel.
        """
        nameInput = self.get_input('InputPort')
        nameOutput = self.get_input('OutputPort')
        rawInputList = self.get_input('InputList')

        # Create inputList to always have iterable elements
        # to simplify code
        if len(nameInput) == 1:
            element_is_iter = False
            inputList = [[element] for element in rawInputList]
        else:
            element_is_iter = True
            inputList = rawInputList

        workflows = []
        module = None
        vtType = None

        # iterating through the connectors
        for connector in self.inputPorts.get('FunctionPort'):
            module = connector.obj

            # pipeline
            original_pipeline = connector.obj.moduleInfo['pipeline']

            # module
            module_id = connector.obj.moduleInfo['moduleId']
            vtType = original_pipeline.modules[module_id].vtType

            # serialize the module for each value in the list
            for i, element in enumerate(inputList):
                if element_is_iter:
                    self.element = element
                else:
                    self.element = element[0]

                # checking type and setting input in the module
                self.typeChecking(connector.obj, nameInput, inputList)
                self.setInputValues(connector.obj, nameInput, element, i)

                pipeline_db_module = original_pipeline.modules[
                    module_id].do_copy()

                # transforming a subworkflow in a group
                # TODO: should we also transform inner subworkflows?
                if pipeline_db_module.is_abstraction():
                    group = Group(id=pipeline_db_module.id,
                                  cache=pipeline_db_module.cache,
                                  location=pipeline_db_module.location,
                                  functions=pipeline_db_module.functions,
                                  annotations=pipeline_db_module.annotations)

                    source_port_specs = pipeline_db_module.sourcePorts()
                    dest_port_specs = pipeline_db_module.destinationPorts()
                    for source_port_spec in source_port_specs:
                        group.add_port_spec(source_port_spec)
                    for dest_port_spec in dest_port_specs:
                        group.add_port_spec(dest_port_spec)

                    group.pipeline = pipeline_db_module.pipeline
                    pipeline_db_module = group

                # getting highest id between functions to guarantee unique ids
                # TODO: can get current IdScope here?
                if pipeline_db_module.functions:
                    high_id = max(function.db_id
                                  for function in pipeline_db_module.functions)
                else:
                    high_id = 0

                # adding function and parameter to module in pipeline
                # TODO: 'pos' should not be always 0 here
                id_scope = IdScope(beginId=long(high_id + 1))
                for elementValue, inputPort in izip(element, nameInput):

                    p_spec = pipeline_db_module.get_port_spec(
                        inputPort, 'input')
                    descrs = p_spec.descriptors()
                    if len(descrs) != 1:
                        raise ModuleError(
                            self, "Tuple input ports are not supported")
                    if not issubclass(descrs[0].module, Constant):
                        raise ModuleError(
                            self, "Module inputs should be Constant types")
                    type = p_spec.sigstring[1:-1]

                    mod_function = ModuleFunction(id=id_scope.getNewId(
                        ModuleFunction.vtType),
                                                  pos=0,
                                                  name=inputPort)
                    mod_param = ModuleParam(id=0L,
                                            pos=0,
                                            type=type,
                                            val=elementValue)

                    mod_function.add_parameter(mod_param)
                    pipeline_db_module.add_function(mod_function)

                # serializing module
                wf = self.serialize_module(pipeline_db_module)
                workflows.append(wf)

            # getting first connector, ignoring the rest
            break

        # IPython stuff
        try:
            rc = get_client()
        except Exception, error:
            raise ModuleError(
                self, "Exception while loading IPython: %s" %
                debug.format_exception(error))