def insert_end(node, decision): """Insert DecisionEnd between node and node parents""" parent_links = node.get_parent_links().exclude(name='default') decision_end = decision.get_child_end() # Find parent decision node for every end's parent. # If the decision node is the one passed, # change the parent to link to the Decision node's DecisionEnd node. # Skip embedded decisions and forks along the way. decision_end_used = False for parent_link in parent_links: parent = parent_link.parent.get_full_node() node_temp = parent while node_temp and not isinstance(node_temp, Decision): if isinstance(node_temp, Join): node_temp = node_temp.get_parent_fork().get_parent() elif isinstance(node_temp, DecisionEnd): node_temp = node_temp.get_parent_decision().get_parent() else: node_temp = node_temp.get_parent() if node_temp.id == decision.id and parent.node_type != Decision.node_type: links = Link.objects.filter(parent=parent).exclude(name__in=['related', 'kill', 'error']) if len(links) != 1: raise RuntimeError(_('Cannot import workflows that have decision DAG leaf nodes with multiple children or no children.')) link = links[0] link.child = decision_end link.save() decision_end_used = True # Create link between DecisionEnd and terminal node. if decision_end_used and not Link.objects.filter(name='to', parent=decision_end, child=node).exists(): link = Link(name='to', parent=decision_end, child=node) link.save()
def decision_helper(decision, subgraphs): """ Iterates through children, waits for ends. When an end is found, finish the decision. If the end has more parents than the decision has branches, bubble the end upwards. """ # Create decision end if it does not exist. if not Link.objects.filter(parent=decision, name='related').exists(): end = DecisionEnd(workflow=workflow, node_type=DecisionEnd.node_type) end.save() link = Link(name='related', parent=decision, child=end) link.save() children = [_link.child.get_full_node() for _link in decision.get_children_links().exclude(name__in=['error','default'])] ends = set() for child in children: end = helper(child, subgraphs) if end: ends.add(end) # A single end means that we've found a unique end for this decision. # Multiple ends mean that we've found a bad decision. if len(ends) > 1: raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.')) elif len(ends) == 1: end = ends.pop() # Branch count will vary with each call if we have multiple decision nodes embedded within decision paths. # This is because parents are replaced with DecisionEnd nodes. fan_in_count = len(end.get_parent_links().exclude(name__in=['error','default'])) # IF it covers all branches, then it is an end that perfectly matches this decision. # ELSE it is an end for a decision path that the current decision node is a part of as well. # The unhandled case is multiple ends for a single decision that converge on a single end. # This is not handled in Hue. fan_out_count = len(decision.get_children_links().exclude(name__in=['error','default'])) if fan_in_count > fan_out_count: insert_end(end, decision) return end elif fan_in_count == fan_out_count: insert_end(end, decision) # End node is a decision node. # This means that there are multiple decision nodes in sequence. # If both decision nodes are within a single decision path, # then the end may need to be returned, if found. if isinstance(end, Decision): end = decision_helper(end, subgraphs) if end: return end # Can do this because we've replace all its parents with a single DecisionEnd node. return helper(end, subgraphs) else: raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.')) else: raise RuntimeError(_('Cannot import workflows that have decisions paths that never end.')) return None
def decision_helper(decision): """ Iterates through children, waits for ends. When an end is found, finish the decision. If the end has more parents than the decision has branches, bubble the end upwards. """ # Create decision end if it does not exist. if not Link.objects.filter(parent=decision, name='related').exists(): end = DecisionEnd(workflow=workflow, node_type=DecisionEnd.node_type) end.save() link = Link(name='related', parent=decision, child=end) link.save() children = [link.child.get_full_node() for link in decision.get_children_links().exclude(name__in=['error','default'])] ends = set() for child in children: end = helper(child) if end: ends.add(end) # A single end means that we've found a unique end for this decision. # Multiple ends mean that we've found a bad decision. if len(ends) > 1: raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.')) elif len(ends) == 1: end = ends.pop() # Branch count will vary with each call if we have multiple decision nodes embedded within decision paths. # This is because parents are replaced with DecisionEnd nodes. fan_in_count = len(end.get_parent_links().exclude(name__in=['error','default'])) # IF it covers all branches, then it is an end that perfectly matches this decision. # ELSE it is an end for a decision path that the current decision node is a part of as well. # The unhandled case is multiple ends for a single decision that converge on a single end. # This is not handled in Hue. fan_out_count = len(decision.get_children_links().exclude(name__in=['error','default'])) if fan_in_count > fan_out_count: insert_end(end, decision) return end elif fan_in_count == fan_out_count: insert_end(end, decision) # End node is a decision node. # This means that there are multiple decision nodes in sequence. # If both decision nodes are within a single decision path, # then the end may need to be returned, if found. if isinstance(end, Decision): end = decision_helper(end) if end: return end # Can do this because we've replace all its parents with a single DecisionEnd node. return helper(end) else: raise RuntimeError(_('Cannot import workflows that have decisions paths with multiple terminal nodes that converge on a single terminal node.')) else: raise RuntimeError(_('Cannot import workflows that have decisions paths that never end.')) return None
def workflow_save(request, workflow): if request.method != 'POST': raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be POST request.'), error_code=405) json_workflow = format_dict_field_values( json.loads(request.POST.get('workflow'))) json_workflow.setdefault('schema_version', workflow.schema_version) form = WorkflowForm(data=json_workflow) if not form.is_valid(): raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': form.errors}, error_code=400) json_nodes = json_workflow['nodes'] id_map = {} errors = {} if not _validate_nodes_json(json_nodes, errors, request.user, workflow): raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': errors}, error_code=400) workflow = _update_workflow_json(json_workflow) nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map, request.user) # Update links index = 0 for json_node in json_nodes: child_links = json_node['child_links'] Link.objects.filter(parent=nodes[index]).delete() for child_link in child_links: link = Link() link.id = getattr(child_link, 'id', None) link.name = child_link['name'] id = str(child_link['parent']) link.parent = Node.objects.get(id=id_map[id]) id = str(child_link['child']) link.child = Node.objects.get(id=id_map[id]) link.comment = child_link.get('comment', '') link.save() index += 1 # Make sure workflow HDFS permissions are correct Workflow.objects.check_workspace(workflow, request.fs) return _workflow(request, workflow=workflow)
def workflow_save(request, workflow): if request.method != "POST": raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_("Must be POST request."), error_code=405) json_workflow = format_dict_field_values(json.loads(request.POST.get("workflow"))) json_workflow.setdefault("schema_version", workflow.schema_version) form = WorkflowForm(data=json_workflow) if not form.is_valid(): raise StructuredException( code="INVALID_REQUEST_ERROR", message=_("Error saving workflow"), data={"errors": form.errors}, error_code=400, ) json_nodes = json_workflow["nodes"] id_map = {} errors = {} if not _validate_nodes_json(json_nodes, errors, request.user, workflow): raise StructuredException( code="INVALID_REQUEST_ERROR", message=_("Error saving workflow"), data={"errors": errors}, error_code=400 ) workflow = _update_workflow_json(json_workflow) nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map, request.user) # Update links index = 0 for json_node in json_nodes: child_links = json_node["child_links"] Link.objects.filter(parent=nodes[index]).delete() for child_link in child_links: link = Link() link.id = getattr(child_link, "id", None) link.name = child_link["name"] id = str(child_link["parent"]) link.parent = Node.objects.get(id=id_map[id]) id = str(child_link["child"]) link.child = Node.objects.get(id=id_map[id]) link.comment = child_link.get("comment", "") link.save() index += 1 # Make sure workflow HDFS permissions are correct Workflow.objects.check_workspace(workflow, request.fs) return _workflow(request, workflow=workflow)
def test_convert_shell(self): wf = Workflow.objects.new_workflow(self.user) wf.save() Workflow.objects.initialize(wf) Link.objects.filter(parent__workflow=wf).delete() action = add_node(wf, 'action-name-1', 'shell', [wf.start], { u'job_xml': 'my-job.xml', u'files': '["hello.py"]', u'name': 'Shell', u'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]', u'capture_output': 'on', u'command': 'hello.py', u'archives': '[{"dummy": "", "name": "test.zip"}]', u'prepares': '[]', u'params': '[{"type": "argument", "value": "baz"}, {"type": "env-var", "value": "foo=bar"}]', u'description': 'Execute a Python script printing its arguments' }) Link(parent=action, child=wf.end, name="ok").save() # Setting doc.last_modified to older date doc = Document.objects.get(id=wf.doc.get().id) Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')) doc = Document.objects.get(id=doc.id) try: if IS_HUE_4.get(): # Test that corresponding doc2 is created after convert assert_false(Document2.objects.filter(owner=self.user, type='query-shell').exists()) converter = DocumentConverter(self.user) converter.convert() doc2 = Document2.objects.get(owner=self.user, type='query-shell') # Verify snippet values assert_equal('ready', doc2.data_dict['snippets'][0]['status']) assert_equal('hello.py', doc2.data_dict['snippets'][0]['properties']['command_path']) assert_equal(['baz'], doc2.data_dict['snippets'][0]['properties']['arguments']) assert_equal(['foo=bar'], doc2.data_dict['snippets'][0]['properties']['env_var']) assert_equal(['mapred.job.queue.name=test'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties']) assert_equal(['test.zip'], doc2.data_dict['snippets'][0]['properties']['archives']) assert_equal([{'type': 'file', 'path': 'hello.py'}], doc2.data_dict['snippets'][0]['properties']['files']) assert_equal(True, doc2.data_dict['snippets'][0]['properties']['capture_output']) else: # Test that corresponding doc2 is created after convert assert_false(Document2.objects.filter(owner=self.user, type='link-workflow').exists()) converter = DocumentConverter(self.user) converter.convert() doc2 = Document2.objects.get(owner=self.user, type='link-workflow') # Verify absolute_url response = self.client.get(doc2.get_absolute_url()) assert_equal(200, response.status_code) assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S')) finally: wf.delete()
def helper(workflow, node, last_fork): if isinstance(node, Fork): join = None children = node.get_children() for child in children: join = helper(workflow, child.get_full_node(), node) or join link = Link(name='related', parent=node, child=join) link.save() node = join elif isinstance(node, Join): return node join = None children = node.get_children() for child in children: join = helper(workflow, child.get_full_node(), last_fork) or join return join
def workflow_save(request, workflow): if request.method != 'POST': raise StructuredException(code="METHOD_NOT_ALLOWED_ERROR", message=_('Must be POST request.'), error_code=405) json_workflow = format_dict_field_values(json.loads(str(request.POST.get('workflow')))) json_workflow.setdefault('schema_version', workflow.schema_version) form = WorkflowForm(data=json_workflow) if not form.is_valid(): raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': form.errors}, error_code=400) json_nodes = json_workflow['nodes'] id_map = {} errors = {} if not _validate_nodes_json(json_nodes, errors, request.user, workflow): raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'errors': errors}, error_code=400) workflow = _update_workflow_json(json_workflow) nodes = _update_workflow_nodes_json(workflow, json_nodes, id_map, request.user) # Update links index = 0 for json_node in json_nodes: child_links = json_node['child_links'] Link.objects.filter(parent=nodes[index]).delete() for child_link in child_links: link = Link() link.id = getattr(child_link, 'id', None) link.name = child_link['name'] id = str(child_link['parent']) link.parent = Node.objects.get(id=id_map[id]) id = str(child_link['child']) link.child = Node.objects.get(id=id_map[id]) link.comment = child_link.get('comment', '') link.save() index += 1 # Make sure workflow HDFS permissions are correct Workflow.objects.check_workspace(workflow, request.fs) return _workflow(request, workflow=workflow)
def test_convert_mapreduce(self): wf = Workflow.objects.new_workflow(self.user) wf.save() Workflow.objects.initialize(wf) Link.objects.filter(parent__workflow=wf).delete() action = add_node(wf, 'action-name-1', 'mapreduce', [wf.start], { 'description': 'Test MR job design', 'files': '[]', 'jar_path': '/user/hue/oozie/examples/lib/hadoop-examples.jar', 'job_properties': '[{"name": "sleep.job.map.sleep.time", "value": "5"}, {"name": "sleep.job.reduce.sleep.time", "value": "10"}]', 'prepares': '[{"value":"${output}","type":"delete"},{"value":"/test","type":"mkdir"}]', 'archives': '[]', }) Link(parent=action, child=wf.end, name="ok").save() # Setting doc.last_modified to older date doc = Document.objects.get(id=wf.doc.get().id) Document.objects.filter(id=doc.id).update(last_modified=datetime.strptime('2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')) doc = Document.objects.get(id=doc.id) try: if IS_HUE_4.get(): # Test that corresponding doc2 is created after convert assert_false(Document2.objects.filter(owner=self.user, type='query-mapreduce').exists()) converter = DocumentConverter(self.user) converter.convert() doc2 = Document2.objects.get(owner=self.user, type='query-mapreduce') # Verify snippet values assert_equal('ready', doc2.data_dict['snippets'][0]['status']) assert_equal('/user/hue/oozie/examples/lib/hadoop-examples.jar', doc2.data_dict['snippets'][0]['properties']['app_jar']) assert_equal(['sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'], doc2.data_dict['snippets'][0]['properties']['hadoopProperties']) else: # Test that corresponding doc2 is created after convert assert_false(Document2.objects.filter(owner=self.user, type='link-workflow').exists()) converter = DocumentConverter(self.user) converter.convert() doc2 = Document2.objects.get(owner=self.user, type='link-workflow') # Verify absolute_url response = self.client.get(doc2.get_absolute_url()) assert_equal(200, response.status_code) assert_equal(doc.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), doc2.last_modified.strftime('%Y-%m-%dT%H:%M:%S')) finally: wf.delete()
def edit_workflow_fork(request, action): fork = action LinkFormSet = modelformset_factory(Link, form=LinkForm, max_num=0) if request.method == 'POST': link_formset = LinkFormSet(request.POST) default_link_form = DefaultLinkForm(request.POST, action=fork) if link_formset.is_valid(): is_decision = fork.has_decisions() link_formset.save() if not is_decision and fork.has_decisions(): default_link = default_link_form.save(commit=False) default_link.parent = fork default_link.name = 'default' default_link.comment = 'default' default_link.save() fork.convert_to_decision() fork.update_description() return redirect( reverse('oozie:edit_workflow', kwargs={'workflow': fork.workflow.id})) else: if filter( lambda link: link.child.id != action.workflow.end.id, [link for link in fork.get_child_join().get_children_links()]): raise PopupException( _('Sorry, this Fork has some other actions below its Join and cannot be converted. ' 'Please delete the nodes below the Join.')) link_formset = LinkFormSet(queryset=fork.get_children_links()) default_link = Link(parent=fork, name='default', comment='default') default_link_form = DefaultLinkForm(action=fork, instance=default_link) return render( 'editor/edit_workflow_fork.mako', request, { 'workflow': fork.workflow, 'fork': fork, 'link_formset': link_formset, 'default_link_form': default_link_form, })
raise StructuredException(code="INVALID_REQUEST_ERROR", message=_('Error saving workflow'), data={'more': str(e)}, error_code=400) json_nodes = json_workflow['nodes'] id_map = {} workflow = update_workflow(json_workflow) nodes = update_workflow_nodes(workflow, json_nodes, id_map) # Update links index = 0 for json_node in json_nodes: child_links = json_node['child_links'] Link.objects.filter(parent=nodes[index]).delete() for child_link in child_links: link = Link() link.id = getattr(child_link, 'id', None) link.name = child_link['name'] id = str(child_link['parent']) link.parent = Node.objects.get(id=id_map[id]) id = str(child_link['child']) link.child = Node.objects.get(id=id_map[id]) link.comment = child_link.get('comment', '') link.save() index += 1
def test_convert_java(self): wf = Workflow.objects.new_workflow(self.user) wf.save() Workflow.objects.initialize(wf) Link.objects.filter(parent__workflow=wf).delete() action = add_node( wf, 'action-name-1', 'java', [wf.start], { 'name': 'MyTeragen', "description": "Generate N number of records", "main_class": "org.apache.hadoop.examples.terasort.TeraGen", "args": "1000 ${output_dir}/teragen", "files": '["my_file","my_file2"]', "job_xml": "", "java_opts": "-Dexample-property=natty", "jar_path": "/user/hue/oozie/workspaces/lib/hadoop-examples.jar", 'job_properties': '[{"name": "mapred.job.queue.name", "value": "test"}]', "prepares": '[{"value":"/test","type":"mkdir"}]', "archives": '[{"dummy":"","name":"my_archive"},{"dummy":"","name":"my_archive2"}]', "capture_output": True, }) Link(parent=action, child=wf.end, name="ok").save() # Setting doc.last_modified to older date doc = Document.objects.get(id=wf.doc.get().id) Document.objects.filter( id=doc.id).update(last_modified=datetime.strptime( '2000-01-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ')) doc = Document.objects.get(id=doc.id) try: # Test that corresponding doc2 is created after convert assert_false( Document2.objects.filter(owner=self.user, type='query-java').exists()) converter = DocumentConverter(self.user) converter.convert() doc2 = Document2.objects.get(owner=self.user, type='query-java') # Verify snippet values assert_equal('ready', doc2.data_dict['snippets'][0]['status']) assert_equal( '/user/hue/oozie/workspaces/lib/hadoop-examples.jar', doc2.data_dict['snippets'][0]['properties']['app_jar']) assert_equal('org.apache.hadoop.examples.terasort.TeraGen', doc2.data_dict['snippets'][0]['properties']['class']) assert_equal('1000 ${output_dir}/teragen', doc2.data_dict['snippets'][0]['properties']['args']) assert_equal( '-Dexample-property=natty', doc2.data_dict['snippets'][0]['properties']['java_opts']) assert_equal([ 'mapred.job.queue.name=test' ], doc2.data_dict['snippets'][0]['properties']['hadoopProperties']) assert_equal( ['my_archive', 'my_archive2'], doc2.data_dict['snippets'][0]['properties']['archives']) assert_equal([{ 'type': 'file', 'path': 'my_file' }, { 'type': 'file', 'path': 'my_file2' }], doc2.data_dict['snippets'][0]['properties']['files']) assert_equal( True, doc2.data_dict['snippets'][0]['properties']['capture_output']) finally: wf.delete()