def test_vector(self):
        rows = {
            "fields": ["a", "b"],
            "rows": [
                {"a": [1, 2, 3], "b": ['1', '2']},
                {"a": [4, 5, 6], "b": ['3', '4']}
            ]
        }

        # Conversion to vtkTable
        vtktable = girder_worker.convert(
            "table",
            {"format": "rows", "data": rows},
            {"format": "vtktable"}
        )["data"]
        self.assertEqual(vtktable.GetNumberOfRows(), 2)
        self.assertEqual(vtktable.GetNumberOfColumns(), 2)
        a = vtktable.GetColumnByName("a")
        b = vtktable.GetColumnByName("b")
        self.assertEqual(a.GetNumberOfComponents(), 3)
        self.assertEqual(b.GetNumberOfComponents(), 2)
        self.assertTrue(isinstance(a, vtk.vtkDoubleArray))
        self.assertTrue(isinstance(b, vtk.vtkStringArray))
        for i in range(6):
            self.assertEqual(a.GetValue(i), i + 1)
        for i in range(4):
            self.assertEqual(b.GetValue(i), str(i + 1))

        # Conversion back to rows
        rows2 = girder_worker.convert(
            "table",
            {"format": "vtktable", "data": vtktable},
            {"format": "rows"}
        )["data"]
        self.assertEqual(rows2, rows)
    def test_adjacencylist(self):
        output = girder_worker.convert(
            'graph', self.test_input['distances'], {'format': 'adjacencylist'})

        expected_edges = set(self.test_input['distances']['data'].edges())
        actual_edges = set()

        for line in output['data'].splitlines():
            parts = line.split(' ', 1)

            if len(parts) > 1:
                source, targets = parts

                for target in targets.split(' '):
                    edge = (source, target)
                    self.assertNotIn(edge, actual_edges)
                    actual_edges.add(edge)

        self.assertEqual(expected_edges, actual_edges)

        output = girder_worker.convert(
            'graph', output, {'format': 'networkx'})

        # Don't take edges into consideration, because they were lost in the
        # original conversion
        self.assertTrue(
            is_isomorphic(output['data'],
                          self.test_input['distances']['data'],
                          edge_match=None))
    def test_treestore(self):
        output = girder_worker.convert(
            'tree',
            {'format': 'newick', 'data': self.newick},
            {'format': 'r.apetree'})
        output = girder_worker.convert('tree', output, {'format': 'treestore'})
        self.assertEqual(output['format'], 'treestore')
        rows = bson.decode_all(output['data'])
        for d in rows:
            if 'rooted' in d:
                root = d
        self.assertNotEqual(root, None)
        self.assertEqual(len(root['clades']), 1)

        def findId(id):
            for d in rows:
                if d['_id'] == id:
                    return d

        top = findId(root['clades'][0])
        self.assertEqual(len(top['clades']), 2)
        internal = findId(top['clades'][0])
        rubribarbus = findId(top['clades'][1])
        ahli = findId(internal['clades'][0])
        allogus = findId(internal['clades'][1])
        self.assertEqual(internal['branch_length'], 2)
        self.assertEqual(ahli['name'], 'ahli')
        self.assertEqual(ahli['branch_length'], 0)
        self.assertEqual(allogus['name'], 'allogus')
        self.assertEqual(allogus['branch_length'], 1)
        self.assertEqual(rubribarbus['name'], 'rubribarbus')
        self.assertEqual(rubribarbus['branch_length'], 3)
    def test_treestore(self):
        output = girder_worker.convert(
            "tree",
            {"format": "newick", "data": self.newick},
            {"format": "r.apetree"})
        output = girder_worker.convert("tree", output, {"format": "treestore"})
        self.assertEqual(output["format"], "treestore")
        rows = bson.decode_all(output["data"])
        for d in rows:
            if "rooted" in d:
                root = d
        self.assertNotEqual(root, None)
        self.assertEqual(len(root["clades"]), 1)

        def findId(id):
            for d in rows:
                if d["_id"] == id:
                    return d

        top = findId(root["clades"][0])
        self.assertEqual(len(top["clades"]), 2)
        internal = findId(top["clades"][0])
        rubribarbus = findId(top["clades"][1])
        ahli = findId(internal["clades"][0])
        allogus = findId(internal["clades"][1])
        self.assertEqual(internal["branch_length"], 2)
        self.assertEqual(ahli["name"], "ahli")
        self.assertEqual(ahli["branch_length"], 0)
        self.assertEqual(allogus["name"], "allogus")
        self.assertEqual(allogus["branch_length"], 1)
        self.assertEqual(rubribarbus["name"], "rubribarbus")
        self.assertEqual(rubribarbus["branch_length"], 3)
    def test_graphml(self):
        output = girder_worker.convert(
            'graph', self.test_input['distances'], {'format': 'graphml'})
        expected_edges = set(self.test_input['distances']['data'].edges(
            data='distance'))
        actual_edges = set()

        self.assertIsInstance(output['data'], (str, unicode))
        tree = etree.fromstring(output['data'])
        self.assertEqual(len(tree), 2)
        self.assertEqual(tree[0].tag, self.GRAPHML_NS + 'key')
        self.assertEqual(tree[1].tag, self.GRAPHML_NS + 'graph')

        for edge in tree[1].findall(self.GRAPHML_NS + 'edge'):
            edge = (edge.attrib['source'],
                    edge.attrib['target'],
                    int(edge.find(self.GRAPHML_NS + 'data').text))

            self.assertNotIn(edge, actual_edges)
            actual_edges.add(edge)

        self.assertEqual(expected_edges, actual_edges)

        output = girder_worker.convert(
            'graph', output, {'format': 'networkx'})

        self.assertTrue(
            is_isomorphic(output['data'],
                          self.test_input['distances']['data'],
                          edge_match=numerical_edge_match('distance', 1)))
 def test_non_binary_tree(self):
     girder_worker.convert(
         "tree",
         {
             "format": "newick",
             "url": "file://" +
                    os.path.join("data", "geospiza_from_otl.phy")
         },
         {"format": "nested"})
 def test_non_binary_tree(self):
     girder_worker.convert(
         'tree',
         {
             'format': 'newick',
             'url': 'file://' +
                    os.path.join('data', 'geospiza_from_otl.phy')
         },
         {'format': 'nested'})
    def test_vtkgraph(self):
        # Test vtkgraph -> vtkgraph.serialized on a simple digraph
        output = girder_worker.convert(
            'graph', self.test_input['simpleVtkDiGraph'],
            {'format': 'vtkgraph.serialized'})

        with open(os.path.join(
                'tests', 'data', 'vtkDiGraph.txt'), 'rb') as fixture:
            self.assertEqual(output['data'].splitlines()[1:],
                             fixture.read().splitlines()[1:])

        # Test networkx -> vtkgraph.serialized on an undirected
        # graph w/ edge data
        output = girder_worker.convert(
            'graph', self.test_input['distances'],
            {'format': 'vtkgraph.serialized'})

        with open(os.path.join(
                'tests', 'data', 'vtkDistancesUndirectedGraph.txt'),
                'rb') as fixture:
            self.assertEqual(output['data'].splitlines()[1:],
                             fixture.read().splitlines()[1:])

        # Test networkx -> vtkgraph with missing edge attributes
        output = girder_worker.convert(
            'graph', self.test_input['grants'],
            {'format': 'vtkgraph.serialized'})

        with open(os.path.join('tests', 'data', 'vtkGrantsDirectedGraph.txt'),
                  'rb') as fixture:
            self.assertEqual(output['data'].splitlines()[1:],
                             fixture.read().splitlines()[1:])

        # Test networkx -> vtkgraph throws errors for different types
        # of metadata
        with self.assertRaises(Exception):
            output = girder_worker.convert(
                'graph', {'format': 'networkx', 'data': nx.Graph([
                    ('A', 'B', {'value': 10}),
                    ('B', 'C', {'value': '10'})
                ])}, {'format': 'vtkgraph'})

        # Test vtkgraph -> networkx
        output = girder_worker.convert(
            'graph', self.test_input['simpleVtkDiGraph'],
            {'format': 'networkx'})

        self.assertIsInstance(output['data'], nx.DiGraph)

        self.assertEqual(len(output['data'].nodes()), 3)
        self.assertEqual(len(output['data'].edges()), 3)
        self.assertEqual(sorted(output['data'].edges(data=True)),
                         [(0, 1, {'Weights': 1.0}),
                          (0, 2, {'Weights': 2.0}),
                          (1, 2, {'Weights': 1.0})])
 def round_trip(self, obj):
     """Convert an object to base64 and back returning the new object."""
     b64 = convert(
         'python',
         {'format': 'object', 'data': obj},
         {'format': 'pickle.base64'}
     )['data']
     newobj = convert(
         'python',
         {'format': 'pickle.base64', 'data': b64},
         {'format': 'object'}
     )
     return newobj['data']
 def test_objectlist_to_rows(self):
     objlist = [{"a": {"b": 5}}, {"a": {"b": {"c": 3}}}]
     output = girder_worker.convert("table", {
         "format": "objectlist",
         "data": objlist
     }, {"format": "rows"})
     self.assertEqual(output["format"], "rows")
     self.assertEqual(output["data"], {
         "fields": ["a.b", "a.b.c"],
         "rows": [{"a.b": 5}, {"a.b.c": 3}]})
     output = girder_worker.convert("table", {
         "format": "rows",
         "data": output["data"]
     }, {"format": "objectlist"})
     self.assertEqual(output["data"], objlist)
 def test_jsonlines(self):
     output = girder_worker.convert("table", {
         "format": "jsonlines",
         "data": '{"a": 1, "b": 2}\n{"a": 3, "b": 4}'
     }, {"format": "objectlist"})
     self.assertEqual(output["format"], "objectlist")
     self.assertEqual(output["data"], [{"a": 1, "b": 2}, {"a": 3, "b": 4}])
 def test_column_names(self):
     output = girder_worker.convert("table", {
         "format": "rows",
         "data": {"fields": ["a", "b"], "rows": [{"a": 6, "b": 5}]}
     }, {"format": "column.names"})
     self.assertEqual(output["format"], "column.names")
     self.assertEqual(output["data"], ["a", "b"])
 def test_column_names_csv(self):
     output = girder_worker.convert("table", {
         "format": "csv",
         "data": ",a,b,longer name\n1,1,1,1\n2,2,2,2\n3,3,3,3\n"
     }, {"format": "column.names"})
     self.assertEqual(output["format"], "column.names")
     self.assertEqual(output["data"], ["", "a", "b", "longer name"])
    def test_sniffer(self):
        output = girder_worker.convert(
            "table",
            {
                "format": "csv",
                "url": "file://" + os.path.join("data", "test.csv")
            },
            {"format": "rows"}
        )
        self.assertEqual(len(output["data"]["fields"]), 32)
        self.assertEqual(output["data"]["fields"][:3], [
            "FACILITY", "ADDRESS", "DATE OF INSPECTION"
        ])
        self.assertEqual(len(output["data"]["rows"]), 14)

        flu = girder_worker.load(os.path.join(
            self.analysis_path, "xdata", "flu.json"))

        output = girder_worker.run(
            flu,
            inputs={},
            outputs={"data": {"type": "table", "format": "rows"}}
        )
        self.assertEqual(output["data"]["data"]["fields"][:3], [
            "Date", "United States", "Alabama"
        ])
    def test_convert(self):
        tmp = tempfile.mktemp()

        output = girder_worker.convert("image", {
            "format": "png.base64",
            "data": self.image
        }, {
            "format": "png",
            "url": "file://" + tmp,
            "mode": "auto"
        })

        value = open(tmp).read()
        os.remove(tmp)
        self.assertEqual(output["format"], "png")
        self.assertEqual(base64.b64encode(value), self.image)

        output = girder_worker.convert(
            "image",
            {"format": "png.base64", "data": self.image},
            {"format": "pil"})

        tmp = tempfile.mktemp()

        output = girder_worker.convert(
            "image",
            output,
            {"format": "png"})

        io1 = StringIO(base64.b64decode(self.image))
        im1 = Image.open(io1)
        io2 = StringIO(output["data"])
        im2 = Image.open(io2)
        self.assertEqual(compareImages(im1, im2), 0)

        output = girder_worker.convert("image", {
            "format": "png.base64",
            "data": self.image
        }, {
            "format": "jpeg"
        })
        data = StringIO(output['data'])
        jpeg = Image.open(data)
        self.assertTrue(isinstance(jpeg, JpegImageFile))
    def test_convert(self):
        tmp = tempfile.mktemp()

        output = girder_worker.convert('image', {
            'format': 'png.base64',
            'data': self.image
        }, {
            'format': 'png',
            'url': 'file://' + tmp,
            'mode': 'auto'
        })

        value = open(tmp).read()
        os.remove(tmp)
        self.assertEqual(output['format'], 'png')
        self.assertEqual(base64.b64encode(value), self.image)

        output = girder_worker.convert(
            'image',
            {'format': 'png.base64', 'data': self.image},
            {'format': 'pil'})

        tmp = tempfile.mktemp()

        output = girder_worker.convert(
            'image',
            output,
            {'format': 'png'})

        io1 = StringIO(base64.b64decode(self.image))
        im1 = Image.open(io1)
        io2 = StringIO(output['data'])
        im2 = Image.open(io2)
        self.assertEqual(compareImages(im1, im2), 0)

        output = girder_worker.convert('image', {
            'format': 'png.base64',
            'data': self.image
        }, {
            'format': 'jpeg'
        })
        data = StringIO(output['data'])
        jpeg = Image.open(data)
        self.assertTrue(isinstance(jpeg, JpegImageFile))
    def test_header_detection(self):
        output = girder_worker.convert(
            "table",
            {"format": "csv", "data": "a,b,c\n7,1,c\n8,2,f\n9,3,i"},
            {"format": "rows"}
        )
        self.assertEqual(output["data"]["fields"], ["a", "b", "c"])
        self.assertEqual(len(output["data"]["rows"]), 3)

        output = girder_worker.convert(
            "table",
            {"format": "csv", "data": "1,2,3\n7,10,\n,11,\n,12,"},
            {"format": "rows"}
        )
        self.assertEqual(
            output["data"]["fields"],
            ["1", "2", "3"]
        )
        self.assertEqual(len(output["data"]["rows"]), 3)
    def test_inputs_from_file(self):
        """Run a task with base64 inputs in a file."""
        a = tempfile.NamedTemporaryFile()
        b = tempfile.NamedTemporaryFile()

        convert(
            "python", {"format": "object", "data": (0, 1)}, {"format": "pickle.base64", "mode": "local", "path": a.name}
        )

        convert("python", {"format": "object", "data": 2}, {"format": "pickle.base64", "mode": "local", "path": b.name})

        outputs = self.run_basic_task(
            {
                "a": {"format": "pickle.base64", "mode": "local", "path": a.name},
                "b": {"format": "pickle.base64", "mode": "local", "path": b.name},
            }
        )

        self.assertEqual(outputs.get("c"), (0, 1, 0, 1))
        self.assertEqual(outputs.get("d"), 4)
    def test_clique(self):
        # clique.json -> NetworkX
        output = girder_worker.convert(
            'graph', self.test_input['alphabetGraph'], {'format': 'networkx'})

        self.assertEqual(
            set([n[1]['name'] for n in output['data'].nodes(data=True)]),
            set(['a', 'b', 'c', 'd']))
        self.assertEqual(len(output['data'].edges()), 3)
        self.assertEqual(output['data'].degree('55ba5019f8883b5bf35f3e30'), 0)

        # NetworkX -> clique.json
        output = girder_worker.convert(
            'graph', output, {'format': 'clique.json'})

        # Since the id of the nodes are lost, only test the structure
        # Check nodes with names a, b, c, and d
        # Check the following edges
        # a -> b
        # a -> c
        # b -> c
        output['data'] = json.loads(output['data'])
        nodes = [item for item in output['data'] if item['type'] == 'node']
        edges = [(item['source']['$oid'], item['target']['$oid'])
                 for item in output['data'] if item['type'] == 'link']
        oid_by_name = {}

        for node in nodes:
            oid_by_name[node['data']['name']] = node['_id']['$oid']

        # Check nodes
        self.assertEqual(sorted(oid_by_name.keys()),
                         ['a', 'b', 'c', 'd'])

        # Check edges
        self.assertEqual(len(edges), 3)
        self.assertIn((oid_by_name['a'], oid_by_name['b']), edges)
        self.assertIn((oid_by_name['a'], oid_by_name['c']), edges)
        self.assertIn((oid_by_name['b'], oid_by_name['c']), edges)
 def test_big_header(self):
     output = girder_worker.convert(
         "table",
         {
             "format": "csv",
             "url": "file://" + os.path.join("data", "RadiomicsData.csv")
         },
         {"format": "rows"}
     )
     self.assertEqual(len(output["data"]["fields"]), 454)
     self.assertEqual(output["data"]["fields"][:3], [
         "GLCM_autocorr", "GLCM_clusProm", "GLCM_clusShade"
     ])
     self.assertEqual(len(output["data"]["rows"]), 99)
    def test_objectlist(self):
        rows = {
            "fields": ["a", "b"],
            "rows": [
                {"a": 1, "b": 'x'},
                {"a": 4, "b": 'y'}
            ]
        }
        objectlist = girder_worker.convert(
            "table",
            {"format": "rows", "data": rows},
            {"format": "objectlist"}
        )["data"]
        # Should have same row data
        self.assertEqual(objectlist, rows["rows"])

        rows2 = girder_worker.convert(
            "table",
            {"format": "objectlist", "data": objectlist},
            {"format": "rows"}
        )["data"]
        # Should have same fields but could be in different order
        self.assertEqual(set(rows["fields"]), set(rows2["fields"]))
        # Should have same row data
        self.assertEqual(rows["rows"], rows2["rows"])

        # Make sure we can go back and forth to JSON
        objectlist = girder_worker.convert(
            "table",
            girder_worker.convert(
                "table",
                {"format": "objectlist", "data": rows["rows"]},
                {"format": "objectlist.json"}
            ),
            {"format": "objectlist"}
        )["data"]
        self.assertEqual(rows["rows"], objectlist)
    def test_inputs_from_file(self):
        """Run a task with base64 inputs in a file."""
        a = tempfile.NamedTemporaryFile()
        b = tempfile.NamedTemporaryFile()

        convert(
            'python',
            {'format': 'object', 'data': (0, 1)},
            {'format': 'pickle.base64', 'mode': 'local', 'path': a.name}
        )

        convert(
            'python',
            {'format': 'object', 'data': 2},
            {'format': 'pickle.base64', 'mode': 'local', 'path': b.name}
        )

        outputs = self.run_basic_task({
            'a': {'format': 'pickle.base64', 'mode': 'local', 'path': a.name},
            'b': {'format': 'pickle.base64', 'mode': 'local', 'path': b.name}
        })

        self.assertEqual(outputs.get('c'), (0, 1, 0, 1))
        self.assertEqual(outputs.get('d'), 4)
 def test_flu(self):
     output = girder_worker.convert(
         "table",
         {
             "format": "csv",
             "url": "file://" + os.path.join("data", "flu.csv")
         },
         {"format": "column.names"}
     )
     self.assertEqual(output["format"], "column.names")
     self.assertEqual(len(output["data"]), 162)
     self.assertEqual(
         output["data"][:3],
         ['Date', 'United States', 'Alabama']
     )
Exemple #24
0
    def convert(self, data_spec, format):
        """Convert to a  compatible data format.

        :param dict data_spec: Data specification
        :param str format: The target data format
        :returns: dict

        >>> spec = {'name': 'a', 'type': 'number', 'format': 'number'}
        >>> port = Port(spec)

        >>> new_spec = port.convert({'format': 'number', 'data': 1}, 'json')
        >>> new_spec['format']
        'json'
        >>> port.fetch(new_spec)
        1
        """
        return convert(self.type, data_spec, {'format': format})
 def test_convert(self):
     outputs = girder_worker.run(
         self.cone,
         inputs={
             "resolution": {"format": "number", "data": 100},
             "radius": {"format": "number", "data": 1}
         },
         outputs={
             "cone": {"format": "vtkpolydata.serialized"}
         })
     cone = outputs["cone"]["data"]
     self.assertTrue(isinstance(cone, str))
     converted = girder_worker.convert(
         "geometry",
         outputs["cone"],
         {"format": "vtkpolydata"}
     )["data"]
     self.assertTrue(isinstance(converted, vtk.vtkPolyData))
     self.assertEqual(converted.GetNumberOfCells(), 101)
     self.assertEqual(converted.GetNumberOfPoints(), 101)
 def test_nan(self):
     output = girder_worker.convert(
         "table",
         {
             "format": "csv",
             "url": "file://" + os.path.join("data", "RadiomicsData.csv")
         },
         {"format": "rows.json"}
     )
     data = json.loads(output["data"])
     self.assertEqual(len(data["fields"]), 454)
     self.assertEqual(data["fields"][:3], [
         "GLCM_autocorr", "GLCM_clusProm", "GLCM_clusShade"
     ])
     self.assertEqual(len(data["rows"]), 99)
     for row in data["rows"]:
         for field in row:
             if isinstance(row[field], float):
                 self.assertFalse(math.isnan(row[field]))
                 self.assertFalse(math.isinf(row[field]))
 def test_convert(self):
     outputs = girder_worker.run(
         self.cone,
         inputs={
             'resolution': {'format': 'number', 'data': 100},
             'radius': {'format': 'number', 'data': 1}
         },
         outputs={
             'cone': {'format': 'vtkpolydata.serialized'}
         })
     cone = outputs['cone']['data']
     self.assertTrue(isinstance(cone, str))
     converted = girder_worker.convert(
         'geometry',
         outputs['cone'],
         {'format': 'vtkpolydata'}
     )['data']
     self.assertTrue(isinstance(converted, vtk.vtkPolyData))
     self.assertEqual(converted.GetNumberOfCells(), 101)
     self.assertEqual(converted.GetNumberOfPoints(), 101)
Exemple #28
0
def run(task, inputs, outputs, task_inputs, task_outputs, validate,  # noqa
        auto_convert, **kwargs):
    # Make map of steps
    steps = {step["name"]: step for step in task["steps"]}

    # Make map of input bindings
    bindings = {step["name"]: {} for step in task["steps"]}

    # Create dependency graph and downstream pointers
    dependencies = {step["name"]: set() for step in task["steps"]}
    downstream = {}
    for conn in task["connections"]:
        # Add dependency graph link for internal links
        if "input_step" in conn and "output_step" in conn:
            dependencies[conn["input_step"]].add(conn["output_step"])

        # Add downstream links for links with output
        if "output_step" in conn:
            ds = downstream.setdefault(conn["output_step"], {})
            ds_list = ds.setdefault(conn["output"], [])
            ds_list.append(conn)

        # Set initial bindings for inputs
        if "input_step" in conn and "output_step" not in conn:
            name = conn["name"]
            bindings[conn["input_step"]][conn["input"]] = {
                "format": task_inputs[name]["format"],
                "data": inputs[name]["script_data"]
            }

    # Traverse analyses in topological order
    for step_set in toposort(dependencies):
        for step in step_set:
            # Visualizations cannot be executed
            if ("visualization" in steps[step] and
                    steps[step]["visualization"]):
                continue

            # Run step
            print "--- beginning: %s ---" % steps[step]["name"]
            out = girder_worker.run(steps[step]["task"], bindings[step])
            print "--- finished: %s ---" % steps[step]["name"]

            # Update bindings of downstream analyses
            if step in downstream:
                for name, conn_list in downstream[step].iteritems():
                    for conn in conn_list:
                        if "input_step" in conn:
                            # This is a connection to a downstream step
                            b = bindings[conn["input_step"]]
                            b[conn["input"]] = out[name]
                        else:
                            # This is a connection to a final output
                            o = outputs[conn["name"]]
                            o["script_data"] = out[name]["data"]

    # Output visualization paramaters
    outputs["_visualizations"] = []
    for step in task["steps"]:
        if "visualization" not in step or not step["visualization"]:
            continue
        vis_bindings = {}
        for b, value in bindings[step["name"]].iteritems():
            script_output = value
            vis_input = None
            for step_input in step["task"]["inputs"]:
                if step_input["name"] == b:
                    vis_input = step_input

            if not vis_input:
                raise Exception(
                    "Could not find visualization input named " + b + "."
                )

            # Validate the output
            if (validate and not
                    girder_worker.isvalid(vis_input["type"], script_output)):
                raise Exception(
                    "Output %s (%s) is not in the expected type (%s) and "
                    "format (%s)." % (name, type(script_output["data"]),
                                      vis_input["type"], script_output["format"]))

            if auto_convert:
                vis_bindings[b] = girder_worker.convert(
                    vis_input["type"],
                    script_output,
                    {"format": vis_input["format"]}
                )

            elif script_output["format"] == vis_input["format"]:
                data = script_output["data"]
                if "mode" in script_output:
                    girder_worker.io.push(data, script_output)
                else:
                    vis_bindings[b] = {
                        "type": vis_input["type"],
                        "format": vis_input["format"],
                        "data": data
                    }
            else:
                raise Exception(
                    "Expected exact format match but '" +
                    script_output["format"] +
                    "' != '" + vis_input["format"] + "'."
                )

            if "script_data" in vis_bindings[b]:
                del vis_bindings[b]["script_data"]

        outputs["_visualizations"].append({
            "mode": "preset",
            "type": step["name"],
            "inputs": vis_bindings
        })
 def convert(*pargs, **kwargs):
     return girder_worker.convert(*pargs, **kwargs)
def run(task, inputs, outputs, task_inputs, task_outputs, validate,  # noqa
        auto_convert, **kwargs):
    # Make map of steps
    steps = {step['name']: step for step in task['steps']}

    # Make map of input bindings
    bindings = {step['name']: {} for step in task['steps']}

    # Create dependency graph and downstream pointers
    dependencies = {step['name']: set() for step in task['steps']}
    downstream = {}
    for conn in task['connections']:
        # Add dependency graph link for internal links
        if 'input_step' in conn and 'output_step' in conn:
            dependencies[conn['input_step']].add(conn['output_step'])

        # Add downstream links for links with output
        if 'output_step' in conn:
            ds = downstream.setdefault(conn['output_step'], {})
            ds_list = ds.setdefault(conn['output'], [])
            ds_list.append(conn)

        # Set initial bindings for inputs
        if 'input_step' in conn and 'output_step' not in conn:
            name = conn['name']
            bindings[conn['input_step']][conn['input']] = {
                'format': task_inputs[name]['format'],
                'data': inputs[name]['script_data']
            }

    # Traverse analyses in topological order
    for step_set in toposort(dependencies):
        for step in step_set:
            # Visualizations cannot be executed
            if ('visualization' in steps[step] and
                    steps[step]['visualization']):
                continue

            # Run step
            print '--- beginning: %s ---' % steps[step]['name']
            out = girder_worker.run(steps[step]['task'], bindings[step])
            print '--- finished: %s ---' % steps[step]['name']

            # Update bindings of downstream analyses
            if step in downstream:
                for name, conn_list in downstream[step].iteritems():
                    for conn in conn_list:
                        if 'input_step' in conn:
                            # This is a connection to a downstream step
                            b = bindings[conn['input_step']]
                            b[conn['input']] = out[name]
                        else:
                            # This is a connection to a final output
                            o = outputs[conn['name']]
                            o['script_data'] = out[name]['data']

    # Output visualization parameters
    outputs['_visualizations'] = []
    for step in task['steps']:
        if 'visualization' not in step or not step['visualization']:
            continue
        vis_bindings = {}
        for b, value in bindings[step['name']].iteritems():
            script_output = value
            vis_input = None
            for step_input in step['task']['inputs']:
                if step_input['name'] == b:
                    vis_input = step_input

            if not vis_input:
                raise Exception(
                    'Could not find visualization input named ' + b + '.'
                )

            # Validate the output
            if (validate and not
                    girder_worker.isvalid(vis_input['type'], script_output)):
                raise Exception(
                    'Output %s (%s) is not in the expected type (%s) and '
                    'format (%s).' % (
                        name, type(script_output['data']),
                        vis_input['type'], script_output['format']))

            if auto_convert:
                vis_bindings[b] = girder_worker.convert(
                    vis_input['type'],
                    script_output,
                    {'format': vis_input['format']}
                )

            elif script_output['format'] == vis_input['format']:
                data = script_output['data']
                if 'mode' in script_output:
                    girder_worker.io.push(data, script_output)
                else:
                    vis_bindings[b] = {
                        'type': vis_input['type'],
                        'format': vis_input['format'],
                        'data': data
                    }
            else:
                raise Exception(
                    'Expected exact format match but "' +
                    script_output['format'] +
                    '" != "' + vis_input['format'] + '".'
                )

            if 'script_data' in vis_bindings[b]:
                del vis_bindings[b]['script_data']

        outputs['_visualizations'].append({
            'mode': 'preset',
            'type': step['name'],
            'inputs': vis_bindings
        })