Exemple #1
0
  def insert_end(node, decision):
    """Insert DecisionEnd between node and node parents"""
    parent_links = node.get_parent_links().exclude(name='default')
    decision_end = decision.get_child_end()

    # Find parent decision node for every end's parent.
    # If the decision node is the one passed,
    # change the parent to link to the Decision node's DecisionEnd node.
    # Skip embedded decisions and forks along the way.
    decision_end_used = False
    for parent_link in parent_links:
      parent = parent_link.parent.get_full_node()
      node_temp = parent
      while node_temp and not isinstance(node_temp, Decision):
        if isinstance(node_temp, Join):
          node_temp = node_temp.get_parent_fork().get_parent()
        elif isinstance(node_temp, DecisionEnd):
          node_temp = node_temp.get_parent_decision().get_parent()
        else:
          node_temp = node_temp.get_parent()

      if node_temp.id == decision.id and parent.node_type != Decision.node_type:
        links = Link.objects.filter(parent=parent).exclude(name__in=['related', 'kill', 'error'])
        if len(links) != 1:
          raise RuntimeError(_('Cannot import workflows that have decision DAG leaf nodes with multiple children or no children.'))
        link = links[0]
        link.child = decision_end
        link.save()

        decision_end_used = True

    # Create link between DecisionEnd and terminal node.
    if decision_end_used and not Link.objects.filter(name='to', parent=decision_end, child=node).exists():
      link = Link(name='to', parent=decision_end, child=node)
      link.save()
Exemple #2
0
  def insert_end(node, decision):
    """Insert DecisionEnd between node and node parents"""
    parent_links = node.get_parent_links().exclude(name='default')
    decision_end = decision.get_child_end()

    # Find parent decision node for every end's parent.
    # If the decision node is the one passed,
    # change the parent to link to the Decision node's DecisionEnd node.
    # Skip embedded decisions and forks along the way.
    decision_end_used = False
    for parent_link in parent_links:
      parent = parent_link.parent.get_full_node()
      node_temp = parent
      while node_temp and not isinstance(node_temp, Decision):
        if isinstance(node_temp, Join):
          node_temp = node_temp.get_parent_fork().get_parent()
        elif isinstance(node_temp, DecisionEnd):
          node_temp = node_temp.get_parent_decision().get_parent()
        else:
          node_temp = node_temp.get_parent()

      if node_temp.id == decision.id and parent.node_type != Decision.node_type:
        links = Link.objects.filter(parent=parent).exclude(name__in=['related', 'kill', 'error'])
        if len(links) != 1:
          raise RuntimeError(_('Cannot import workflows that have decision DAG leaf nodes with multiple children or no children.'))
        link = links[0]
        link.child = decision_end
        link.save()

        decision_end_used = True

    # Create link between DecisionEnd and terminal node.
    if decision_end_used and not Link.objects.filter(name='to', parent=decision_end, child=node).exists():
      link = Link(name='to', parent=decision_end, child=node)
      link.save()
Exemple #3
0
  def decision_helper(decision, subgraphs):
    """
    Iterates through children, waits for ends.
    When an end is found, finish the decision.
    If the end has more parents than the decision has branches, bubble the end upwards.
    """
    # Create decision end if it does not exist.
    if not Link.objects.filter(parent=decision, name='related').exists():
      end = DecisionEnd(workflow=workflow, node_type=DecisionEnd.node_type)
      end.save()
      link = Link(name='related', parent=decision, child=end)
      link.save()

    children = [_link.child.get_full_node() for _link in decision.get_children_links().exclude(name__in=['error','default'])]

    ends = set()
    for child in children:
      end = helper(child, subgraphs)
      if end:
        ends.add(end)

    # A single end means that we've found a unique end for this decision.
    # Multiple ends mean that we've found a bad decision.
    if len(ends) > 1:
      raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.'))
    elif len(ends) == 1:
      end = ends.pop()
      # Branch count will vary with each call if we have multiple decision nodes embedded within decision paths.
      # This is because parents are replaced with DecisionEnd nodes.
      fan_in_count = len(end.get_parent_links().exclude(name__in=['error','default']))
      # IF it covers all branches, then it is an end that perfectly matches this decision.
      # ELSE it is an end for a decision path that the current decision node is a part of as well.
      # The unhandled case is multiple ends for a single decision that converge on a single end.
      # This is not handled in Hue.
      fan_out_count = len(decision.get_children_links().exclude(name__in=['error','default']))
      if fan_in_count > fan_out_count:
        insert_end(end, decision)
        return end
      elif fan_in_count == fan_out_count:
        insert_end(end, decision)
        # End node is a decision node.
        # This means that there are multiple decision nodes in sequence.
        # If both decision nodes are within a single decision path,
        # then the end may need to be returned, if found.
        if isinstance(end, Decision):
          end = decision_helper(end, subgraphs)
          if end:
            return end

        # Can do this because we've replace all its parents with a single DecisionEnd node.
        return helper(end, subgraphs)
      else:
        raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.'))
    else:
      raise RuntimeError(_('Cannot import workflows that have decisions paths that never end.'))

    return None
Exemple #4
0
  def decision_helper(decision):
    """
    Iterates through children, waits for ends.
    When an end is found, finish the decision.
    If the end has more parents than the decision has branches, bubble the end upwards.
    """
    # Create decision end if it does not exist.
    if not Link.objects.filter(parent=decision, name='related').exists():
      end = DecisionEnd(workflow=workflow, node_type=DecisionEnd.node_type)
      end.save()
      link = Link(name='related', parent=decision, child=end)
      link.save()

    children = [link.child.get_full_node() for link in decision.get_children_links().exclude(name__in=['error','default'])]

    ends = set()
    for child in children:
      end = helper(child)
      if end:
        ends.add(end)

    # A single end means that we've found a unique end for this decision.
    # Multiple ends mean that we've found a bad decision.
    if len(ends) > 1:
      raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.'))
    elif len(ends) == 1:
      end = ends.pop()
      # Branch count will vary with each call if we have multiple decision nodes embedded within decision paths.
      # This is because parents are replaced with DecisionEnd nodes.
      fan_in_count = len(end.get_parent_links().exclude(name__in=['error','default']))
      # IF it covers all branches, then it is an end that perfectly matches this decision.
      # ELSE it is an end for a decision path that the current decision node is a part of as well.
      # The unhandled case is multiple ends for a single decision that converge on a single end.
      # This is not handled in Hue.
      fan_out_count = len(decision.get_children_links().exclude(name__in=['error','default']))
      if fan_in_count > fan_out_count:
        insert_end(end, decision)
        return end
      elif fan_in_count == fan_out_count:
        insert_end(end, decision)
        # End node is a decision node.
        # This means that there are multiple decision nodes in sequence.
        # If both decision nodes are within a single decision path,
        # then the end may need to be returned, if found.
        if isinstance(end, Decision):
          end = decision_helper(end)
          if end:
            return end

        # Can do this because we've replace all its parents with a single DecisionEnd node.
        return helper(end)
      else:
        raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.'))
    else:
      raise RuntimeError(_('Cannot import workflows that have decisions paths that never end.'))

    return None
Exemple #5
0
def workflow_save(request, workflow):
    if request.method != 'POST':
        raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR",
                                  message=_('Must be POST request.'),
                                  error_code=405)

    json_workflow = format_dict_field_values(
        json.loads(request.POST.get('workflow')))
    json_workflow.setdefault('schema_version', workflow.schema_version)

    form = WorkflowForm(data=json_workflow)

    if not form.is_valid():
        raise StructuredException(code="INVALID_REQUEST_ERROR",
                                  message=_('Error saving workflow'),
                                  data={'errors': form.errors},
                                  error_code=400)

    json_nodes = json_workflow['nodes']
    id_map = {}
    errors = {}

    if not _validate_nodes_json(json_nodes, errors, request.user, workflow):
        raise StructuredException(code="INVALID_REQUEST_ERROR",
                                  message=_('Error saving workflow'),
                                  data={'errors': errors},
                                  error_code=400)

    workflow = _update_workflow_json(json_workflow)
    nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map,
                                        request.user)

    # Update links
    index = 0
    for json_node in json_nodes:
        child_links = json_node['child_links']
        Link.objects.filter(parent=nodes[index]).delete()

        for child_link in child_links:
            link = Link()
            link.id = getattr(child_link, 'id', None)
            link.name = child_link['name']

            id = str(child_link['parent'])
            link.parent = Node.objects.get(id=id_map[id])

            id = str(child_link['child'])
            link.child = Node.objects.get(id=id_map[id])

            link.comment = child_link.get('comment', '')

            link.save()

        index += 1

    # Make sure workflow HDFS permissions are correct
    Workflow.objects.check_workspace(workflow, request.fs)

    return _workflow(request, workflow=workflow)
Exemple #6
0
def workflow_save(request, workflow):
    if request.method != "POST":
        raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_("Must be POST request."), error_code=405)

    json_workflow = format_dict_field_values(json.loads(request.POST.get("workflow")))
    json_workflow.setdefault("schema_version", workflow.schema_version)

    form = WorkflowForm(data=json_workflow)

    if not form.is_valid():
        raise StructuredException(
            code="INVALID_REQUEST_ERROR",
            message=_("Error saving workflow"),
            data={"errors": form.errors},
            error_code=400,
        )

    json_nodes = json_workflow["nodes"]
    id_map = {}
    errors = {}

    if not _validate_nodes_json(json_nodes, errors, request.user, workflow):
        raise StructuredException(
            code="INVALID_REQUEST_ERROR", message=_("Error saving workflow"), data={"errors": errors}, error_code=400
        )

    workflow = _update_workflow_json(json_workflow)
    nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map, request.user)

    # Update links
    index = 0
    for json_node in json_nodes:
        child_links = json_node["child_links"]
        Link.objects.filter(parent=nodes[index]).delete()

        for child_link in child_links:
            link = Link()
            link.id = getattr(child_link, "id", None)
            link.name = child_link["name"]

            id = str(child_link["parent"])
            link.parent = Node.objects.get(id=id_map[id])

            id = str(child_link["child"])
            link.child = Node.objects.get(id=id_map[id])

            link.comment = child_link.get("comment", "")

            link.save()

        index += 1

    # Make sure workflow HDFS permissions are correct
    Workflow.objects.check_workspace(workflow, request.fs)

    return _workflow(request, workflow=workflow)
Exemple #7
0
  def test_convert_shell(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'shell', [wf.start], {
      u'job_xml': 'my-job.xml',
      u'files': '["hello.py"]',
      u'name': 'Shell',
      u'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]',
      u'capture_output': 'on',
      u'command': 'hello.py',
      u'archives': '[{"dummy": "", "name": "test.zip"}]',
      u'prepares': '[]',
      u'params': '[{"type": "argument", "value": "baz"}, {"type": "env-var", "value": "foo=bar"}]',
      u'description': 'Execute a Python script printing its arguments'
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      if IS_HUE_4.get():
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='query-shell').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='query-shell')

        # Verify snippet values
        assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
        assert_equal('hello.py', doc2.data_dict['snippets'][0]['properties']['command_path'])
        assert_equal(['baz'], doc2.data_dict['snippets'][0]['properties']['arguments'])
        assert_equal(['foo=bar'], doc2.data_dict['snippets'][0]['properties']['env_var'])
        assert_equal(['mapred.job.queue.name=test'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
        assert_equal(['test.zip'], doc2.data_dict['snippets'][0]['properties']['archives'])
        assert_equal([{'type': 'file', 'path': 'hello.py'}], doc2.data_dict['snippets'][0]['properties']['files'])
        assert_equal(True, doc2.data_dict['snippets'][0]['properties']['capture_output'])
      else:
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='link-workflow').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='link-workflow')

        # Verify absolute_url
        response = self.client.get(doc2.get_absolute_url())
        assert_equal(200, response.status_code)
        assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))
    finally:
      wf.delete()
Exemple #8
0
  def helper(workflow, node, last_fork):
    if isinstance(node, Fork):
      join = None
      children = node.get_children()
      for child in children:
        join = helper(workflow, child.get_full_node(), node) or join
      link = Link(name='related', parent=node, child=join)
      link.save()

      node = join

    elif isinstance(node, Join):
      return node

    join = None
    children = node.get_children()
    for child in children:
      join = helper(workflow, child.get_full_node(), last_fork) or join
    return join
Exemple #9
0
    def helper(workflow, node, last_fork):
        if isinstance(node, Fork):
            join = None
            children = node.get_children()
            for child in children:
                join = helper(workflow, child.get_full_node(), node) or join
            link = Link(name='related', parent=node, child=join)
            link.save()

            node = join

        elif isinstance(node, Join):
            return node

        join = None
        children = node.get_children()
        for child in children:
            join = helper(workflow, child.get_full_node(), last_fork) or join
        return join
Exemple #10
0
def workflow_save(request, workflow):
  if request.method != 'POST':
    raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be POST request.'), error_code=405)

  json_workflow = format_dict_field_values(json.loads(str(request.POST.get('workflow'))))
  json_workflow.setdefault('schema_version', workflow.schema_version)

  form = WorkflowForm(data=json_workflow)

  if not form.is_valid():
    raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': form.errors}, error_code=400)

  json_nodes = json_workflow['nodes']
  id_map = {}
  errors = {}

  if not _validate_nodes_json(json_nodes, errors, request.user, workflow):
    raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': errors}, error_code=400)

  workflow = _update_workflow_json(json_workflow)
  nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map, request.user)

  # Update links
  index = 0
  for json_node in json_nodes:
    child_links = json_node['child_links']
    Link.objects.filter(parent=nodes[index]).delete()

    for child_link in child_links:
      link = Link()
      link.id = getattr(child_link, 'id', None)
      link.name = child_link['name']

      id = str(child_link['parent'])
      link.parent = Node.objects.get(id=id_map[id])

      id = str(child_link['child'])
      link.child = Node.objects.get(id=id_map[id])

      link.comment = child_link.get('comment', '')

      link.save()

    index += 1

  # Make sure workflow HDFS permissions are correct
  Workflow.objects.check_workspace(workflow, request.fs)

  return _workflow(request, workflow=workflow)
Exemple #11
0
  def test_convert_mapreduce(self):
    wf = Workflow.objects.new_workflow(self.user)
    wf.save()
    Workflow.objects.initialize(wf)
    Link.objects.filter(parent__workflow=wf).delete()
    action = add_node(wf, 'action-name-1', 'mapreduce', [wf.start], {
      'description': 'Test MR job design',
      'files': '[]',
      'jar_path': '/user/hue/oozie/examples/lib/hadoop-examples.jar',
      'job_properties': '[{"name": "sleep.job.map.sleep.time", "value": "5"}, {"name": "sleep.job.reduce.sleep.time", "value": "10"}]',
      'prepares': '[{"value":"${output}","type":"delete"},{"value":"/test","type":"mkdir"}]',
      'archives': '[]',
    })
    Link(parent=action, child=wf.end, name="ok").save()

    # Setting doc.last_modified to older date
    doc = Document.objects.get(id=wf.doc.get().id)
    Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
    doc = Document.objects.get(id=doc.id)

    try:
      if IS_HUE_4.get():
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='query-mapreduce').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='query-mapreduce')

        # Verify snippet values
        assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
        assert_equal('/user/hue/oozie/examples/lib/hadoop-examples.jar', doc2.data_dict['snippets'][0]['properties']['app_jar'])
        assert_equal(['sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
      else:
        # Test that corresponding doc2 is created after convert
        assert_false(Document2.objects.filter(owner=self.user, type='link-workflow').exists())

        converter = DocumentConverter(self.user)
        converter.convert()

        doc2 = Document2.objects.get(owner=self.user, type='link-workflow')

        # Verify absolute_url
        response = self.client.get(doc2.get_absolute_url())
        assert_equal(200, response.status_code)
        assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S'))
    finally:
      wf.delete()
Exemple #12
0
def edit_workflow_fork(request, action):
    fork = action
    LinkFormSet = modelformset_factory(Link, form=LinkForm, max_num=0)

    if request.method == 'POST':
        link_formset = LinkFormSet(request.POST)
        default_link_form = DefaultLinkForm(request.POST, action=fork)

        if link_formset.is_valid():
            is_decision = fork.has_decisions()
            link_formset.save()
            if not is_decision and fork.has_decisions():
                default_link = default_link_form.save(commit=False)
                default_link.parent = fork
                default_link.name = 'default'
                default_link.comment = 'default'
                default_link.save()
                fork.convert_to_decision()
            fork.update_description()

            return redirect(
                reverse('oozie:edit_workflow',
                        kwargs={'workflow': fork.workflow.id}))
    else:
        if filter(
                lambda link: link.child.id != action.workflow.end.id,
            [link for link in fork.get_child_join().get_children_links()]):
            raise PopupException(
                _('Sorry, this Fork has some other actions below its Join and cannot be converted. '
                  'Please delete the nodes below the Join.'))

        link_formset = LinkFormSet(queryset=fork.get_children_links())
        default_link = Link(parent=fork, name='default', comment='default')
        default_link_form = DefaultLinkForm(action=fork, instance=default_link)

    return render(
        'editor/edit_workflow_fork.mako', request, {
            'workflow': fork.workflow,
            'fork': fork,
            'link_formset': link_formset,
            'default_link_form': default_link_form,
        })
Exemple #13
0
    raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'more': str(e)}, error_code=400)

  json_nodes = json_workflow['nodes']
  id_map = {}

  workflow = update_workflow(json_workflow)
  nodes = update_workflow_nodes(workflow, json_nodes, id_map)

  # Update links
  index = 0
  for json_node in json_nodes:
    child_links = json_node['child_links']
    Link.objects.filter(parent=nodes[index]).delete()

    for child_link in child_links:
      link = Link()
      link.id = getattr(child_link, 'id', None)
      link.name = child_link['name']

      id = str(child_link['parent'])
      link.parent = Node.objects.get(id=id_map[id])

      id = str(child_link['child'])
      link.child = Node.objects.get(id=id_map[id])

      link.comment = child_link.get('comment', '')

      link.save()

    index += 1
Exemple #14
0
    def test_convert_java(self):
        wf = Workflow.objects.new_workflow(self.user)
        wf.save()
        Workflow.objects.initialize(wf)
        Link.objects.filter(parent__workflow=wf).delete()
        action = add_node(
            wf, 'action-name-1', 'java', [wf.start], {
                'name': 'MyTeragen',
                "description": "Generate N number of records",
                "main_class": "org.apache.hadoop.examples.terasort.TeraGen",
                "args": "1000 ${output_dir}/teragen",
                "files": '["my_file","my_file2"]',
                "job_xml": "",
                "java_opts": "-Dexample-property=natty",
                "jar_path":
                "/user/hue/oozie/workspaces/lib/hadoop-examples.jar",
                'job_properties':
                '[{"name": "mapred.job.queue.name", "value": "test"}]',
                "prepares": '[{"value":"/test","type":"mkdir"}]',
                "archives":
                '[{"dummy":"","name":"my_archive"},{"dummy":"","name":"my_archive2"}]',
                "capture_output": True,
            })
        Link(parent=action, child=wf.end, name="ok").save()

        # Setting doc.last_modified to older date
        doc = Document.objects.get(id=wf.doc.get().id)
        Document.objects.filter(
            id=doc.id).update(last_modified=datetime.strptime(
                '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ'))
        doc = Document.objects.get(id=doc.id)

        try:
            # Test that corresponding doc2 is created after convert
            assert_false(
                Document2.objects.filter(owner=self.user,
                                         type='query-java').exists())

            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, type='query-java')

            # Verify snippet values
            assert_equal('ready', doc2.data_dict['snippets'][0]['status'])
            assert_equal(
                '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
                doc2.data_dict['snippets'][0]['properties']['app_jar'])
            assert_equal('org.apache.hadoop.examples.terasort.TeraGen',
                         doc2.data_dict['snippets'][0]['properties']['class'])
            assert_equal('1000 ${output_dir}/teragen',
                         doc2.data_dict['snippets'][0]['properties']['args'])
            assert_equal(
                '-Dexample-property=natty',
                doc2.data_dict['snippets'][0]['properties']['java_opts'])
            assert_equal([
                'mapred.job.queue.name=test'
            ], doc2.data_dict['snippets'][0]['properties']['hadoopProperties'])
            assert_equal(
                ['my_archive', 'my_archive2'],
                doc2.data_dict['snippets'][0]['properties']['archives'])
            assert_equal([{
                'type': 'file',
                'path': 'my_file'
            }, {
                'type': 'file',
                'path': 'my_file2'
            }], doc2.data_dict['snippets'][0]['properties']['files'])
            assert_equal(
                True,
                doc2.data_dict['snippets'][0]['properties']['capture_output'])
        finally:
            wf.delete()