Exemple #1
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      # Update workflow params in coordinator
      hue_coord.clear_workflow_params()
      properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

      # Deploy WF XML
      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
      submission.deploy(deployment_dir=wf_application_path)
      submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

      # Deploy Coordinator XML
      job.conf_dict.update(mapping)
      submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
      submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
      # Server picks up deployed Coordinator XML changes after running 'update' action
      submission.update_coord()

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('/scheduler/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Exemple #2
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      # Update workflow params in coordinator
      hue_coord.clear_workflow_params()
      properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

      # Deploy WF XML
      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
      submission.deploy(deployment_dir=wf_application_path)
      submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

      # Deploy Coordinator XML
      job.conf_dict.update(mapping)
      submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
      submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
      # Server picks up deployed Coordinator XML changes after running 'update' action
      submission.update_coord()

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor2/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Exemple #3
0
def test_copy_files():
    cluster = pseudo_hdfs4.shared_cluster()

    try:
        c = make_logged_in_client()
        user = User.objects.get(username='******')
        ensure_home_directory(cluster.fs, user)

        prefix = '/tmp/test_copy_files'

        if cluster.fs.exists(prefix):
            cluster.fs.rmtree(prefix)

        # Jars in various locations
        deployment_dir = '%s/workspace' % prefix
        external_deployment_dir = '%s/deployment' % prefix
        jar_1 = '%s/udf1.jar' % prefix
        jar_2 = '%s/lib/udf2.jar' % prefix
        jar_3 = '%s/udf3.jar' % deployment_dir
        jar_4 = '%s/lib/udf4.jar' % deployment_dir  # Doesn't move
        jar_5 = 'udf5.jar'
        jar_6 = 'lib/udf6.jar'  # Doesn't move

        cluster.fs.mkdir(prefix)
        cluster.fs.create(jar_1)
        cluster.fs.create(jar_2)
        cluster.fs.create(jar_3)
        cluster.fs.create(jar_4)
        cluster.fs.create(deployment_dir + '/' + jar_5)
        cluster.fs.create(deployment_dir + '/' + jar_6)

        class MockJob(object):
            XML_FILE_NAME = 'workflow.xml'

            def __init__(self):
                self.deployment_dir = deployment_dir
                self.nodes = [
                    Node({
                        'id': '1',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_1
                        }
                    }),
                    Node({
                        'id': '2',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_2
                        }
                    }),
                    Node({
                        'id': '3',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_3
                        }
                    }),
                    Node({
                        'id': '4',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_4
                        }
                    }),

                    # Workspace relative paths
                    Node({
                        'id': '5',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_5
                        }
                    }),
                    Node({
                        'id': '6',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_6
                        }
                    })
                ]

        submission = Submission(user,
                                job=MockJob(),
                                fs=cluster.fs,
                                jt=cluster.jt)

        submission._copy_files(deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})
        submission._copy_files(external_deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})

        assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'),
                    deployment_dir)
        assert_true(cluster.fs.exists(deployment_dir + '/job.properties'),
                    deployment_dir)

        # All sources still there
        assert_true(cluster.fs.exists(jar_1))
        assert_true(cluster.fs.exists(jar_2))
        assert_true(cluster.fs.exists(jar_3))
        assert_true(cluster.fs.exists(jar_4))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

        # Lib
        deployment_dir = deployment_dir + '/lib'
        external_deployment_dir = external_deployment_dir + '/lib'

        if USE_LIBPATH_FOR_JARS.get():
            assert_true(jar_1 in submission.properties['oozie.libpath'])
            assert_true(jar_2 in submission.properties['oozie.libpath'])
            assert_true(jar_3 in submission.properties['oozie.libpath'])
            assert_true(jar_4 in submission.properties['oozie.libpath'])
            print(deployment_dir + '/' + jar_5)
            assert_true((deployment_dir + '/' + jar_5)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
            assert_true((deployment_dir + '/' + jar_6)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
        else:
            list_dir_workspace = cluster.fs.listdir(deployment_dir)
            list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

            # All destinations there
            assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'),
                        list_dir_workspace)

            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf1.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf2.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf3.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf4.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf5.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf6.jar'),
                list_dir_deployement)

            stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
            stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
            stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
            stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
            stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
            stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

            submission._copy_files('%s/workspace' % prefix,
                                   "<xml>My XML</xml>", {'prop1': 'val1'})

            assert_not_equal(
                stats_udf1['fileId'],
                cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
            assert_not_equal(
                stats_udf2['fileId'],
                cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
            assert_not_equal(
                stats_udf3['fileId'],
                cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
            assert_equal(
                stats_udf4['fileId'],
                cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
            assert_not_equal(
                stats_udf5['fileId'],
                cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
            assert_equal(
                stats_udf6['fileId'],
                cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

        # Test _create_file()
        submission._create_file(deployment_dir, 'test.txt', data='Test data')
        assert_true(cluster.fs.exists(deployment_dir + '/test.txt'),
                    list_dir_workspace)

    finally:
        try:
            cluster.fs.rmtree(prefix)
        except:
            LOG.exception('failed to remove %s' % prefix)
Exemple #4
0
def test_copy_files():
  cluster = pseudo_hdfs4.shared_cluster()

  try:
    c = make_logged_in_client()
    user = User.objects.get(username='******')

    prefix = '/tmp/test_copy_files'

    if cluster.fs.exists(prefix):
      cluster.fs.rmtree(prefix)

    # Jars in various locations
    deployment_dir = '%s/workspace' % prefix
    external_deployment_dir = '%s/deployment' % prefix
    jar_1 = '%s/udf1.jar' % prefix
    jar_2 = '%s/lib/udf2.jar' % prefix
    jar_3 = '%s/udf3.jar' % deployment_dir
    jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move
    jar_5 = 'udf5.jar'
    jar_6 = 'lib/udf6.jar' # Doesn't move

    cluster.fs.mkdir(prefix)
    cluster.fs.create(jar_1)
    cluster.fs.create(jar_2)
    cluster.fs.create(jar_3)
    cluster.fs.create(jar_4)
    cluster.fs.create(deployment_dir + '/' + jar_5)
    cluster.fs.create(deployment_dir + '/' + jar_6)

    class MockJob():
      XML_FILE_NAME = 'workflow.xml'

      def __init__(self):
        self.deployment_dir = deployment_dir
        self.nodes = [
            Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}),
            Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}),
            Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}),
            Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}),

            # Workspace relative paths
            Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}),
            Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}})
        ]

    submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt)

    submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})
    submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir)
    assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir)

    # All sources still there
    assert_true(cluster.fs.exists(jar_1))
    assert_true(cluster.fs.exists(jar_2))
    assert_true(cluster.fs.exists(jar_3))
    assert_true(cluster.fs.exists(jar_4))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

    # Lib
    deployment_dir = deployment_dir + '/lib'
    external_deployment_dir = external_deployment_dir + '/lib'

    list_dir_workspace = cluster.fs.listdir(deployment_dir)
    list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

    # All destinations there
    assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace)

    assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement)

    stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
    stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
    stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
    stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
    stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
    stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

    submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
    assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
    assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
    assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
    assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
    assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

    # Test _create_file()
    submission._create_file(deployment_dir, 'test.txt', data='Test data')
    assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace)

  finally:
    try:
      cluster.fs.rmtree(prefix)
    except:
      LOG.exception('failed to remove %s' % prefix)
Exemple #5
0
def sync_coord_workflow(request, job_id):
    ParametersFormSet = formset_factory(ParameterForm, extra=0)
    job = check_job_access_permission(request, job_id)
    check_job_edition_permission(job, request.user)

    hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
    hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
    wf_application_path = (
        job.conf_dict.get("wf_application_path") and Hdfs.urlsplit(job.conf_dict["wf_application_path"])[2] or ""
    )
    coord_application_path = (
        job.conf_dict.get("oozie.coord.application.path")
        and Hdfs.urlsplit(job.conf_dict["oozie.coord.application.path"])[2]
        or ""
    )
    properties = (
        hue_coord
        and hue_coord.properties
        and dict([(param["name"], param["value"]) for param in hue_coord.properties])
        or None
    )

    if request.method == "POST":
        params_form = ParametersFormSet(request.POST)
        if params_form.is_valid():
            mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data])

            # Update workflow params in coordinator
            hue_coord.clear_workflow_params()
            properties = dict([(param["name"], param["value"]) for param in hue_coord.properties])

            # Deploy WF XML
            submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
            submission._create_file(
                wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True
            )

            # Deploy Coordinator XML
            job.conf_dict.update(mapping)
            submission = Submission(
                user=request.user,
                job=hue_coord,
                fs=request.fs,
                jt=request.jt,
                properties=job.conf_dict,
                oozie_id=job.id,
            )
            submission._create_file(
                coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True
            )
            # Server picks up deployed Coordinator XML changes after running 'update' action
            submission.update_coord()

            request.info(_("Successfully updated Workflow definition"))
            return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id}))
        else:
            request.error(_("Invalid submission form: %s" % params_form.errors))
    else:
        new_params = hue_wf and hue_wf.find_all_parameters() or []
        new_params = dict([(param["name"], param["value"]) for param in new_params])

        # Set previous values
        if properties:
            new_params = dict(
                [
                    (key, properties[key]) if key in properties.keys() else (key, new_params[key])
                    for key, value in new_params.iteritems()
                ]
            )

        initial_params = ParameterForm.get_initial_params(new_params)
        params_form = ParametersFormSet(initial=initial_params)

    popup = render(
        "editor2/submit_job_popup.mako",
        request,
        {
            "params_form": params_form,
            "name": _("Job"),
            "header": _("Sync Workflow definition?"),
            "action": reverse("oozie:sync_coord_workflow", kwargs={"job_id": job_id}),
        },
        force_template=True,
    ).content
    return JsonResponse(popup, safe=False)