Beispiel #1
0
def _submit_bundle(request, bundle, properties):
  try:
    deployment_mapping = {}
    coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])])

    for i, bundled in enumerate(bundle.data['coordinators']):
      coord = coords[bundled['coordinator']]
      workflow = Workflow(document=coord.dependencies.all()[0])
      wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy()
      deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir)

      coordinator = Coordinator(document=coord)
      coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy()
      deployment_mapping['coord_%s_dir' % i] = coord_dir
      deployment_mapping['coord_%s' % i] = coord

    properties.update(deployment_mapping)

    submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting bundle')
    raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex))
Beispiel #2
0
def submit_external_job(request, application_path):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)

    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])
      mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on'
      application_name = os.path.basename(application_path)
      application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow()
      mapping[application_class.get_application_path_key()] = os.path.dirname(application_path)

      try:
        submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping)
        job_id = submission.run(application_path)
      except RestException, ex:
        detail = ex._headers.get('oozie-error-message', ex)
        if 'Max retries exceeded with url' in str(detail):
          detail = '%s: %s' % (_('The Oozie server is not running'), detail)
        LOG.exception(smart_str(detail))
        raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

      jsonify = request.POST.get('format') == 'json'
      if jsonify:
        return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False)
      else:
        request.info(_('Oozie job submitted'))
        view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow'
        return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
Beispiel #3
0
def _submit_bundle(request, bundle, properties):
  try:
    deployment_mapping = {}
    coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])])

    for i, bundled in enumerate(bundle.data['coordinators']):
      coord = coords[bundled['coordinator']]
      workflow = Workflow(document=coord.dependencies.filter(type='oozie-workflow2')[0])
      wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy()
      deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir)

      coordinator = Coordinator(document=coord)
      coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy()
      deployment_mapping['coord_%s_dir' % i] = request.fs.get_hdfs_path(coord_dir)
      deployment_mapping['coord_%s' % i] = coord

      # Convert start/end dates of coordinator to server timezone
      for prop in bundled['properties']:
        if prop['name'] in ('end_date', 'start_date'):
          prop['value'] = convert_to_server_timezone(prop['value'], local_tz=coordinator.data['properties']['timezone'])

    properties.update(deployment_mapping)

    submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting bundle')
    raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex))
Beispiel #4
0
def submit_external_job(request, application_path):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)

    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])
      mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on'
      application_name = os.path.basename(application_path)
      application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow()
      mapping[application_class.get_application_path_key()] = application_path

      try:
        submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping)
        job_id = submission.run(application_path)
      except RestException, ex:
        detail = ex._headers.get('oozie-error-message', ex)
        if 'Max retries exceeded with url' in str(detail):
          detail = '%s: %s' % (_('The Oozie server is not running'), detail)

        LOG.exception(smart_str(detail))

        raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

      request.info(_('Oozie job submitted'))
      view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow'
      return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
Beispiel #5
0
def _submit_bundle(request, bundle, properties):
  try:
    deployment_mapping = {}
    coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])])

    for i, bundled in enumerate(bundle.data['coordinators']):
      coord = coords[bundled['coordinator']]
      workflow = Workflow(document=coord.dependencies.filter(type='oozie-workflow2')[0])
      wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy()
      deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir)

      coordinator = Coordinator(document=coord)
      coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy()
      deployment_mapping['coord_%s_dir' % i] = request.fs.get_hdfs_path(coord_dir)
      deployment_mapping['coord_%s' % i] = coord

      # Convert start/end dates of coordinator to server timezone
      for prop in bundled['properties']:
        if prop['name'] in ('end_date', 'start_date'):
          prop['value'] = convert_to_server_timezone(prop['value'], local_tz=coordinator.data['properties']['timezone'])

    properties.update(deployment_mapping)

    submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting bundle')
    raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
Beispiel #6
0
def _submit_coordinator(request, coordinator, mapping):
    try:
        wf = coordinator.workflow
        wf_dir = Submission(
            request.user,
            wf,
            request.fs,
            request.jt,
            mapping,
            local_tz=coordinator.data['properties']['timezone']).deploy()

        properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
        properties.update(mapping)

        submission = Submission(request.user,
                                coordinator,
                                request.fs,
                                request.jt,
                                properties=properties)
        job_id = submission.run()

        return job_id
    except RestException, ex:
        LOG.exception('Error submitting coordinator')
        raise PopupException(_("Error submitting coordinator %s") %
                             (coordinator, ),
                             detail=ex._headers.get('oozie-error-message', ex))
Beispiel #7
0
def _submit_bundle(request, bundle, properties):
    try:
        deployment_mapping = {}
        coords = dict([(c.uuid, c) for c in Document2.objects.filter(
            type='oozie-coordinator2',
            uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])])

        for i, bundled in enumerate(bundle.data['coordinators']):
            coord = coords[bundled['coordinator']]
            workflow = Workflow(document=coord.dependencies.all()[0])
            wf_dir = Submission(request.user, workflow, request.fs, request.jt,
                                properties).deploy()
            deployment_mapping['wf_%s_dir' %
                               i] = request.fs.get_hdfs_path(wf_dir)

            coordinator = Coordinator(document=coord)
            coord_dir = Submission(request.user, coordinator, request.fs,
                                   request.jt, properties).deploy()
            deployment_mapping['coord_%s_dir' % i] = coord_dir
            deployment_mapping['coord_%s' % i] = coord

        properties.update(deployment_mapping)

        submission = Submission(request.user,
                                bundle,
                                request.fs,
                                request.jt,
                                properties=properties)
        job_id = submission.run()

        return job_id
    except RestException, ex:
        raise PopupException(_("Error submitting bundle %s") % (bundle, ),
                             detail=ex._headers.get('oozie-error-message', ex))
Beispiel #8
0
def _rerun_workflow(request, oozie_id, run_args, mapping):
  try:
    submission = Submission(user=request.user, fs=request.fs, jt=request.jt, properties=mapping, oozie_id=oozie_id)
    job_id = submission.rerun(**run_args)
    return job_id
  except RestException, ex:
    msg = _("Error re-running workflow %s.") % (oozie_id,)
    LOG.exception(msg)

    raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
Beispiel #9
0
def _rerun_bundle(request, oozie_id, args, params, properties):
  try:
    submission = Submission(user=request.user, fs=request.fs, jt=request.jt, oozie_id=oozie_id, properties=properties)
    job_id = submission.rerun_bundle(params=params, **args)
    return job_id
  except RestException, ex:
    msg = _("Error re-running bundle %s.") % (oozie_id,)
    LOG.exception(msg)

    raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
Beispiel #10
0
def _rerun_workflow(request, oozie_id, run_args, mapping):
  try:
    submission = Submission(user=request.user, fs=request.fs, jt=request.jt, properties=mapping, oozie_id=oozie_id)
    job_id = submission.rerun(**run_args)
    return job_id
  except RestException, ex:
    msg = _("Error re-running workflow %s.") % (oozie_id,)
    LOG.exception(msg)

    raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
Beispiel #11
0
def _rerun_bundle(request, oozie_id, args, params, properties):
  try:
    submission = Submission(user=request.user, fs=request.fs, jt=request.jt, oozie_id=oozie_id, properties=properties)
    job_id = submission.rerun_bundle(params=params, **args)
    return job_id
  except RestException, ex:
    msg = _("Error re-running bundle %s.") % (oozie_id,)
    LOG.exception(msg)

    raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
Beispiel #12
0
def submit_external_job(request, application_path):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)

  if application_path.startswith('abfs:/') and not application_path.startswith('abfs://'):
    application_path = application_path.replace("abfs:/", "abfs://")
  elif application_path.startswith('s3a:/') and not application_path.startswith('s3a://'):
    application_path = application_path.replace('s3a:/', 's3a://')
  else:
    application_path = "/" + application_path

  if application_path.startswith("abfs://"):
    application_path = abfspath(application_path)

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)

    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])
      mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on'
      application_name = os.path.basename(application_path)
      application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow()
      mapping[application_class.get_application_path_key()] = os.path.dirname(application_path)

      try:
        submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping)
        job_id = submission.run(application_path)
      except RestException as ex:
        detail = ex._headers.get('oozie-error-message', ex)
        if 'Max retries exceeded with url' in str(detail):
          detail = '%s: %s' % (_('The Oozie server is not running'), detail)
        LOG.exception(smart_str(detail))
        raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

      jsonify = request.POST.get('format') == 'json'
      if jsonify:
        return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False)
      else:
        request.info(_('Oozie job submitted'))
        view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow'
        return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path)
    initial_params = ParameterForm.get_initial_params(parameters)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor/submit_job_popup.mako', request, {
                   'params_form': params_form,
                   'name': _('Job'),
                   'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}),
                   'show_dryrun': os.path.basename(application_path) != 'bundle.xml',
                   'return_json': request.GET.get('format') == 'json'
                 }, force_template=True).content
  return JsonResponse(popup, safe=False)
Beispiel #13
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      # Update workflow params in coordinator
      hue_coord.clear_workflow_params()
      properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

      # Deploy WF XML
      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
      submission.deploy(deployment_dir=wf_application_path)
      submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

      # Deploy Coordinator XML
      job.conf_dict.update(mapping)
      submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
      submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
      # Server picks up deployed Coordinator XML changes after running 'update' action
      submission.update_coord()

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor2/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Beispiel #14
0
    def test_generate_altus_action_start_cluster(self):
        class TestJob(object):
            XML_FILE_NAME = 'workflow.xml'

            def __init__(self):
                self.deployment_dir = '/tmp/test'
                self.nodes = [
                    Node({
                        'id': '1',
                        'type': 'hive-document',
                        'properties': {
                            'jdbc_url':
                            u"${wf:actionData('shell-31b5')['hiveserver']}",
                            'password': u'test'
                        }
                    })
                ]

        user = User.objects.get(username='******')
        submission = Submission(user,
                                job=TestJob(),
                                fs=MockFs(logical_name='fsname'),
                                jt=MockJt(logical_name='jtname'))

        command = submission._generate_altus_action_script(
            service='dataeng',
            command='listClusters',
            arguments={},
            auth_key_id='altus_auth_key_id',
            auth_key_secret='altus_auth_key_secret')

        assert_true(
            '''#!/usr/bin/env python

from navoptapi.api_lib import ApiLib

hostname = 'dataengapi.us-west-1.altus.cloudera.com'
auth_key_id = 'altus_auth_key_id'
auth_key_secret = \'\'\'altus_auth_key_secret\'\'\'

def _exec(service, command, parameters=None):
  if parameters is None:
    parameters = {}

  try:
    api = ApiLib(service, hostname, auth_key_id, auth_key_secret)
    resp = api.call_api(command, parameters)
    return resp.json()
  except Exception, e:
    print e
    raise e

print _exec('dataeng', 'listClusters', {})
''' in command, command)
Beispiel #15
0
def _submit_workflow(user, fs, jt, workflow, mapping):
  try:
    submission = Submission(user, workflow, fs, jt, mapping)
    job_id = submission.run()
    return job_id
  except RestException, ex:
    detail = ex._headers.get('oozie-error-message', ex)
    if 'Max retries exceeded with url' in str(detail):
      detail = '%s: %s' % (_('The Oozie server is not running'), detail)
    LOG.error(smart_str(detail))
    raise PopupException(_("Error submitting workflow %s") % (workflow,), detail=detail)
Beispiel #16
0
  def test_get_logical_properties(self):
    submission = Submission(self.user, fs=MockFs(logical_name='fsname'), jt=MockJt(logical_name='jtname'))

    assert_equal({}, submission.properties)

    submission._update_properties('curacao:8032', '/deployment_dir')

    assert_equal({
        'jobTracker': 'jtname',
        'nameNode': 'fsname'
      }, submission.properties)
Beispiel #17
0
  def test_get_properties(self):
    submission = Submission(self.user, fs=MockFs())

    assert_equal({}, submission.properties)

    submission._update_properties('curacao:8032', '/deployment_dir')

    assert_equal({
        'jobTracker': 'curacao:8032',
        'nameNode': 'hdfs://curacao:8020'
      }, submission.properties)
Beispiel #18
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      # Update workflow params in coordinator
      hue_coord.clear_workflow_params()
      properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

      # Deploy WF XML
      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
      submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

      # Deploy Coordinator XML
      job.conf_dict.update(mapping)
      submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
      submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
      # Server picks up deployed Coordinator XML changes after running 'update' action
      submission.update_coord()

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor2/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Beispiel #19
0
    def test_update_credentials_from_hive_action_when_jdbc_url_is_variable(
            self):
        class TestJob(object):
            XML_FILE_NAME = 'workflow.xml'

            def __init__(self):
                self.deployment_dir = '/tmp/test'
                self.nodes = [
                    Node({
                        'id': '1',
                        'type': 'hive-document',
                        'properties': {
                            'jdbc_url':
                            u"${wf:actionData('shell-31b5')['hiveserver']}",
                            'password': u'test'
                        }
                    })
                ]

        user = User.objects.get(username='******')
        submission = Submission(user,
                                job=TestJob(),
                                fs=MockFs(logical_name='fsname'),
                                jt=MockJt(logical_name='jtname'))

        finish = (
            beeswax.conf.HIVE_SERVER_HOST.set_for_testing('hue-koh-chang'),
            beeswax.conf.HIVE_SERVER_PORT.set_for_testing(12345),
        )

        try:
            creds = Credentials(credentials=TestCredentials.CREDENTIALS.copy())
            hive_properties = {
                'thrift_uri': 'thrift://*****:*****@test-realm.com',
            }

            submission.properties['credentials'] = creds.get_properties(
                hive_properties)
            submission._update_credentials_from_hive_action(creds)

            assert_equal(
                submission.properties['credentials'][
                    creds.hiveserver2_name]['properties'],
                [('hive2.jdbc.url',
                  u'jdbc:hive2://hue-koh-chang:12345/default'),
                 ('hive2.server.principal', u'hive/[email protected]')
                 ])

        finally:
            for f in finish:
                f()
Beispiel #20
0
def _submit_workflow(user, fs, jt, workflow, mapping):
  try:
    submission = Submission(user, workflow, fs, jt, mapping)
    job_id = submission.run()

    workflow.document.add_to_history(submission.user, {'properties': submission.properties, 'oozie_id': submission.oozie_id})

    return job_id
  except RestException, ex:
    detail = ex._headers.get('oozie-error-message', ex)
    if 'Max retries exceeded with url' in str(detail):
      detail = '%s: %s' % (_('The Oozie server is not running'), detail)
    LOG.exception('Error submitting workflow: %s' % smart_str(detail))
    raise PopupException(_("Error submitting workflow %s: %s") % (workflow, detail))
Beispiel #21
0
def _submit_workflow(user, fs, jt, workflow, mapping):
  try:
    submission = Submission(user, workflow, fs, jt, mapping)
    job_id = submission.run()

    workflow.document.add_to_history(submission.user, {'properties': submission.properties, 'oozie_id': submission.oozie_id})

    return job_id
  except RestException, ex:
    detail = ex._headers.get('oozie-error-message', ex)
    if 'Max retries exceeded with url' in str(detail):
      detail = '%s: %s' % (_('The Oozie server is not running'), detail)
    LOG.exception('Error submitting workflow: %s' % smart_str(detail))
    raise PopupException(_("Error submitting workflow %s: %s") % (workflow, detail))
Beispiel #22
0
def _submit_coordinator(request, coordinator, mapping):
  try:
    wf_doc = Document2.objects.get(uuid=coordinator.data['properties']['workflow'])
    wf_dir = Submission(request.user, Workflow(document=wf_doc), request.fs, request.jt, mapping).deploy()

    properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
    properties.update(mapping)

    submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    raise PopupException(_("Error submitting coordinator %s") % (coordinator,),
                         detail=ex._headers.get('oozie-error-message', ex))
Beispiel #23
0
def _submit_coordinator(request, coordinator, mapping):
  try:
    wf_doc = Document2.objects.get_by_uuid(user=request.user, uuid=coordinator.data['properties']['workflow'])
    wf_dir = Submission(request.user, Workflow(document=wf_doc), request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy()

    properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
    properties.update(mapping)

    submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting coordinator')
    raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex))
Beispiel #24
0
  def test_update_properties(self):
    finish = []
    finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
    finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
    try:
      properties = {
        'user.name': 'hue',
        'test.1': 'http://localhost/test?test1=test&test2=test',
        'nameNode': 'hdfs://curacao:8020',
        'jobTracker': 'jtaddress',
        'security_enabled': False
      }

      final_properties = properties.copy()
      submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs())
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jtaddress',
        'nameNode': fs.fs_defaultfs
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)

      finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode'))
      finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker'))
      cluster.clear_caches()
      fs = cluster.get_hdfs()
      jt = cluster.get_next_ha_mrcluster()[1]
      final_properties = properties.copy()
      final_properties.update({
        'jobTracker': 'jobtracker',
        'nameNode': 'namenode'
      })
      submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt)
      assert_equal(properties, submission.properties)
      submission._update_properties('jtaddress', 'deployment-directory')
      assert_equal(final_properties, submission.properties)
    finally:
      cluster.clear_caches()
      for reset in finish:
        reset()
Beispiel #25
0
  def test_get_external_parameters(self):
    xml = """
<workflow-app name="Pig" xmlns="uri:oozie:workflow:0.4">
    <start to="Pig"/>
    <action name="Pig">
        <pig>
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <prepare>
                  <delete path="${output}"/>
            </prepare>
            <script>aggregate.pig</script>
              <argument>-param</argument>
              <argument>INPUT=${input}</argument>
              <argument>-param</argument>
              <argument>OUTPUT=${output}</argument>
            <configuration>
              <property>
                <name>mapred.input.format.class</name>
                <value>org.apache.hadoop.examples.SleepJob$SleepInputFormat</value>
              </property>
            </configuration>
        </pig>
        <ok to="end"/>
        <error to="kill"/>
    </action>
    <kill name="kill">
        <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
    </kill>
    <end name="end"/>
</workflow-app>
    """

    properties = """
#
# Licensed to the Hue
#

nameNode=hdfs://localhost:8020
jobTracker=localhost:8021
queueName=default
examplesRoot=examples

oozie.use.system.libpath=true

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/pig
    """
    parameters = Submission(self.user)._get_external_parameters(xml, properties)

    assert_equal({'oozie.use.system.libpath': 'true',
                   'input': '',
                   'jobTracker': 'localhost:8021',
                   'oozie.wf.application.path': '${nameNode}/user/${user.name}/${examplesRoot}/apps/pig',
                   'examplesRoot': 'examples',
                   'output': '',
                   'nameNode': 'hdfs://localhost:8020',
                   'queueName': 'default'
                  },
                 parameters)
Beispiel #26
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=mapping)
      submission._sync_definition(wf_application_path, mapping)

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    parameters = hue_wf and hue_wf.find_all_parameters() or []
    params_dict = dict([(param['name'], param['value']) for param in parameters])

    submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=None)
    prev_properties = hue_wf and hue_wf.deployment_dir and \
                      submission.get_external_parameters(request.fs.join(wf_application_path, hue_wf.XML_FILE_NAME)) or {}

    for key, value in params_dict.iteritems():
      params_dict[key] = prev_properties[key] if key in prev_properties.keys() else params_dict[key]

    initial_params = ParameterForm.get_initial_params(params_dict)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor2/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Beispiel #27
0
def submit_external_job(request, application_path):
    ParametersFormSet = formset_factory(ParameterForm, extra=0)

    if request.method == "POST":
        params_form = ParametersFormSet(request.POST)

        if params_form.is_valid():
            mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data])
            mapping["dryrun"] = request.POST.get("dryrun_checkbox") == "on"
            application_name = os.path.basename(application_path)
            application_class = (
                Bundle
                if application_name == "bundle.xml"
                else Coordinator
                if application_name == "coordinator.xml"
                else get_workflow()
            )
            mapping[application_class.get_application_path_key()] = application_path

            try:
                submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping)
                job_id = submission.run(application_path)
            except RestException, ex:
                detail = ex._headers.get("oozie-error-message", ex)
                if "Max retries exceeded with url" in str(detail):
                    detail = "%s: %s" % (_("The Oozie server is not running"), detail)

                LOG.exception(smart_str(detail))

                raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

            request.info(_("Oozie job submitted"))
            view = (
                "list_oozie_bundle"
                if application_name == "bundle.xml"
                else "list_oozie_coordinator"
                if application_name == "coordinator.xml"
                else "list_oozie_workflow"
            )
            return redirect(reverse("oozie:%s" % view, kwargs={"job_id": job_id}))
        else:
            request.error(_("Invalid submission form: %s" % params_form.errors))
Beispiel #28
0
  def _schedule_oozie_job(self, workspace_path, collection_name, input_path):
    oozie = get_oozie(self.username)

    properties = {
      "dryrun": "False",
      "zkHost":  zkensemble(),
      # these libs can be installed from here:
      # https://drive.google.com/a/cloudera.com/folderview?id=0B1gZoK8Ae1xXc0sxSkpENWJ3WUU&usp=sharing
      "oozie.libpath": CONFIG_INDEXER_LIBS_PATH.get(),
      "security_enabled": "False",
      "collectionName": collection_name,
      "filePath": input_path,
      "outputDir": "/user/%s/indexer" % self.username,
      "workspacePath": workspace_path,
      'oozie.wf.application.path': "${nameNode}%s" % workspace_path,
      'user.name': self.username
    }

    submission = Submission(self.username, fs=self.fs, properties=properties)
    job_id = submission.run(workspace_path)

    return job_id
Beispiel #29
0
    def _schedule_oozie_job(self, workspace_path, collection_name, input_path):
        oozie = get_oozie(self.username)

        properties = {
            "dryrun": "False",
            "zkHost": zkensemble(),
            # these libs can be installed from here:
            # https://drive.google.com/a/cloudera.com/folderview?id=0B1gZoK8Ae1xXc0sxSkpENWJ3WUU&usp=sharing
            "oozie.libpath": CONFIG_INDEXER_LIBS_PATH.get(),
            "security_enabled": "False",
            "collectionName": collection_name,
            "filePath": input_path,
            "outputDir": "/user/%s/indexer" % self.username,
            "workspacePath": workspace_path,
            'oozie.wf.application.path': "${nameNode}%s" % workspace_path,
            'user.name': self.username
        }

        submission = Submission(self.username,
                                fs=self.fs,
                                properties=properties)
        job_id = submission.run(workspace_path)

        return job_id
Beispiel #30
0
def _submit_coordinator(request, coordinator, mapping):
    try:
        wf = coordinator.workflow
        if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster():
            mapping['auto-cluster'] = {
                u'additionalClusterResourceTags': [],
                u'automaticTerminationCondition':
                u'EMPTY_JOB_QUEUE',  #'u'NONE',
                u'cdhVersion':
                u'CDH514',
                u'clouderaManagerPassword':
                u'guest',
                u'clouderaManagerUsername':
                u'guest',
                u'clusterName':
                u'analytics4',  # Add time variable
                u'computeWorkersConfiguration': {
                    u'bidUSDPerHr': 0,
                    u'groupSize': 0,
                    u'useSpot': False
                },
                u'environmentName':
                u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946',
                u'instanceBootstrapScript':
                u'',
                u'instanceType':
                u'm4.xlarge',
                u'jobSubmissionGroupName':
                u'',
                u'jobs': [
                    {
                        u'failureAction': u'INTERRUPT_JOB_QUEUE',
                        u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                        u'sparkJob': {
                            u'applicationArguments': ['5'],
                            u'jars': [
                                u's3a://datawarehouse-customer360/ETL/spark-examples.jar'
                            ],
                            u'mainClass':
                            u'org.apache.spark.examples.SparkPi'
                        }
                    },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': ['10'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
                    #             u'mainClass': u'org.apache.spark.examples.SparkPi'
                    #           }
                    #         },
                    #         {
                    #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
                    #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
                    #           u'sparkJob': {
                    #             u'applicationArguments': [u'filesystems3.conf'],
                    #             u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'],
                    #             u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain',
                    #             u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf'
                    #           }
                    #         }
                ],
                u'namespaceName':
                u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410',
                u'publicKey':
                DEFAULT_PUBLIC_KEY.get(),
                u'serviceType':
                u'SPARK',
                u'workersConfiguration': {},
                u'workersGroupSize':
                u'3'
            }
        wf_dir = Submission(
            request.user,
            wf,
            request.fs,
            request.jt,
            mapping,
            local_tz=coordinator.data['properties']['timezone']).deploy()

        properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
        properties.update(mapping)

        submission = Submission(request.user,
                                coordinator,
                                request.fs,
                                request.jt,
                                properties=properties)
        job_id = submission.run()

        return job_id
    except RestException, ex:
        LOG.exception('Error submitting coordinator')
        raise PopupException(_("Error submitting coordinator %s") %
                             (coordinator, ),
                             detail=ex._headers.get('oozie-error-message', ex),
                             error_code=200)
Beispiel #31
0
def test_copy_files():
  cluster = pseudo_hdfs4.shared_cluster()

  try:
    c = make_logged_in_client()
    user = User.objects.get(username='******')

    prefix = '/tmp/test_copy_files'

    if cluster.fs.exists(prefix):
      cluster.fs.rmtree(prefix)

    # Jars in various locations
    deployment_dir = '%s/workspace' % prefix
    external_deployment_dir = '%s/deployment' % prefix
    jar_1 = '%s/udf1.jar' % prefix
    jar_2 = '%s/lib/udf2.jar' % prefix
    jar_3 = '%s/udf3.jar' % deployment_dir
    jar_4 = '%s/lib/udf4.jar' % deployment_dir # Never move

    cluster.fs.mkdir(prefix)
    cluster.fs.create(jar_1)
    cluster.fs.create(jar_2)
    cluster.fs.create(jar_3)
    cluster.fs.create(jar_4)

    class MockNode():
      def __init__(self, jar_path):
        self.jar_path = jar_path

    class MockJob():
      XML_FILE_NAME = 'workflow.xml'

      def __init__(self):
        self.node_list = [
            MockNode(jar_1),
            MockNode(jar_2),
            MockNode(jar_3),
            MockNode(jar_4),
        ]

    submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt)

    submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})
    submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir)
    assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir)

    # All sources still there
    assert_true(cluster.fs.exists(jar_1))
    assert_true(cluster.fs.exists(jar_2))
    assert_true(cluster.fs.exists(jar_3))
    assert_true(cluster.fs.exists(jar_4))

    deployment_dir = deployment_dir + '/lib'
    external_deployment_dir = external_deployment_dir + '/lib'

    list_dir_workspace = cluster.fs.listdir(deployment_dir)
    list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

    # All destinations there
    assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace)

    assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement)

    stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
    stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
    stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
    stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')

    submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
    assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
    assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
    assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])

  finally:
    try:
      cluster.fs.rmtree(prefix)
    except:
      pass
Beispiel #32
0
def sync_coord_workflow(request, job_id):
    ParametersFormSet = formset_factory(ParameterForm, extra=0)
    job = check_job_access_permission(request, job_id)
    check_job_edition_permission(job, request.user)

    hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
    hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
    wf_application_path = (
        job.conf_dict.get("wf_application_path") and Hdfs.urlsplit(job.conf_dict["wf_application_path"])[2] or ""
    )
    coord_application_path = (
        job.conf_dict.get("oozie.coord.application.path")
        and Hdfs.urlsplit(job.conf_dict["oozie.coord.application.path"])[2]
        or ""
    )
    properties = (
        hue_coord
        and hue_coord.properties
        and dict([(param["name"], param["value"]) for param in hue_coord.properties])
        or None
    )

    if request.method == "POST":
        params_form = ParametersFormSet(request.POST)
        if params_form.is_valid():
            mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data])

            # Update workflow params in coordinator
            hue_coord.clear_workflow_params()
            properties = dict([(param["name"], param["value"]) for param in hue_coord.properties])

            # Deploy WF XML
            submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
            submission._create_file(
                wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True
            )

            # Deploy Coordinator XML
            job.conf_dict.update(mapping)
            submission = Submission(
                user=request.user,
                job=hue_coord,
                fs=request.fs,
                jt=request.jt,
                properties=job.conf_dict,
                oozie_id=job.id,
            )
            submission._create_file(
                coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True
            )
            # Server picks up deployed Coordinator XML changes after running 'update' action
            submission.update_coord()

            request.info(_("Successfully updated Workflow definition"))
            return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id}))
        else:
            request.error(_("Invalid submission form: %s" % params_form.errors))
    else:
        new_params = hue_wf and hue_wf.find_all_parameters() or []
        new_params = dict([(param["name"], param["value"]) for param in new_params])

        # Set previous values
        if properties:
            new_params = dict(
                [
                    (key, properties[key]) if key in properties.keys() else (key, new_params[key])
                    for key, value in new_params.iteritems()
                ]
            )

        initial_params = ParameterForm.get_initial_params(new_params)
        params_form = ParametersFormSet(initial=initial_params)

    popup = render(
        "editor2/submit_job_popup.mako",
        request,
        {
            "params_form": params_form,
            "name": _("Job"),
            "header": _("Sync Workflow definition?"),
            "action": reverse("oozie:sync_coord_workflow", kwargs={"job_id": job_id}),
        },
        force_template=True,
    ).content
    return JsonResponse(popup, safe=False)
Beispiel #33
0
def _submit_coordinator(request, coordinator, mapping):
  try:
    wf = coordinator.workflow
    if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster():
      mapping['auto-cluster'] = {
        u'additionalClusterResourceTags': [],
        u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE',
        u'cdhVersion': u'CDH514',
        u'clouderaManagerPassword': u'guest',
        u'clouderaManagerUsername': u'guest',
        u'clusterName': u'analytics4', # Add time variable
        u'computeWorkersConfiguration': {
          u'bidUSDPerHr': 0,
          u'groupSize': 0,
          u'useSpot': False
        },
        u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946',
        u'instanceBootstrapScript': u'',
        u'instanceType': u'm4.xlarge',
        u'jobSubmissionGroupName': u'',
        u'jobs': [{
            u'failureAction': u'INTERRUPT_JOB_QUEUE',
            u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
            u'sparkJob': {
              u'applicationArguments': ['5'],
              u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
              u'mainClass': u'org.apache.spark.examples.SparkPi'
            }
          },
  #         {
  #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
  #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
  #           u'sparkJob': {
  #             u'applicationArguments': ['10'],
  #             u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'],
  #             u'mainClass': u'org.apache.spark.examples.SparkPi'
  #           }
  #         },
  #         {
  #           u'failureAction': u'INTERRUPT_JOB_QUEUE',
  #           u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51',
  #           u'sparkJob': {
  #             u'applicationArguments': [u'filesystems3.conf'],
  #             u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'],
  #             u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain',
  #             u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf'
  #           }
  #         }
        ],
        u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410',
        u'publicKey': DEFAULT_PUBLIC_KEY.get(),
        u'serviceType': u'SPARK',
        u'workersConfiguration': {},
        u'workersGroupSize': u'3'
      }
    wf_dir = Submission(request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy()

    properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)}
    properties.update(mapping)

    submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties)
    job_id = submission.run()

    return job_id
  except RestException, ex:
    LOG.exception('Error submitting coordinator')
    raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
Beispiel #34
0
def test_copy_files():
    cluster = pseudo_hdfs4.shared_cluster()

    try:
        c = make_logged_in_client()
        user = User.objects.get(username='******')
        ensure_home_directory(cluster.fs, user)

        prefix = '/tmp/test_copy_files'

        if cluster.fs.exists(prefix):
            cluster.fs.rmtree(prefix)

        # Jars in various locations
        deployment_dir = '%s/workspace' % prefix
        external_deployment_dir = '%s/deployment' % prefix
        jar_1 = '%s/udf1.jar' % prefix
        jar_2 = '%s/lib/udf2.jar' % prefix
        jar_3 = '%s/udf3.jar' % deployment_dir
        jar_4 = '%s/lib/udf4.jar' % deployment_dir  # Doesn't move
        jar_5 = 'udf5.jar'
        jar_6 = 'lib/udf6.jar'  # Doesn't move

        cluster.fs.mkdir(prefix)
        cluster.fs.create(jar_1)
        cluster.fs.create(jar_2)
        cluster.fs.create(jar_3)
        cluster.fs.create(jar_4)
        cluster.fs.create(deployment_dir + '/' + jar_5)
        cluster.fs.create(deployment_dir + '/' + jar_6)

        class MockJob(object):
            XML_FILE_NAME = 'workflow.xml'

            def __init__(self):
                self.deployment_dir = deployment_dir
                self.nodes = [
                    Node({
                        'id': '1',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_1
                        }
                    }),
                    Node({
                        'id': '2',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_2
                        }
                    }),
                    Node({
                        'id': '3',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_3
                        }
                    }),
                    Node({
                        'id': '4',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_4
                        }
                    }),

                    # Workspace relative paths
                    Node({
                        'id': '5',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_5
                        }
                    }),
                    Node({
                        'id': '6',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_6
                        }
                    })
                ]

        submission = Submission(user,
                                job=MockJob(),
                                fs=cluster.fs,
                                jt=cluster.jt)

        submission._copy_files(deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})
        submission._copy_files(external_deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})

        assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'),
                    deployment_dir)
        assert_true(cluster.fs.exists(deployment_dir + '/job.properties'),
                    deployment_dir)

        # All sources still there
        assert_true(cluster.fs.exists(jar_1))
        assert_true(cluster.fs.exists(jar_2))
        assert_true(cluster.fs.exists(jar_3))
        assert_true(cluster.fs.exists(jar_4))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

        # Lib
        deployment_dir = deployment_dir + '/lib'
        external_deployment_dir = external_deployment_dir + '/lib'

        if USE_LIBPATH_FOR_JARS.get():
            assert_true(jar_1 in submission.properties['oozie.libpath'])
            assert_true(jar_2 in submission.properties['oozie.libpath'])
            assert_true(jar_3 in submission.properties['oozie.libpath'])
            assert_true(jar_4 in submission.properties['oozie.libpath'])
            print(deployment_dir + '/' + jar_5)
            assert_true((deployment_dir + '/' + jar_5)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
            assert_true((deployment_dir + '/' + jar_6)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
        else:
            list_dir_workspace = cluster.fs.listdir(deployment_dir)
            list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

            # All destinations there
            assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'),
                        list_dir_workspace)

            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf1.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf2.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf3.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf4.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf5.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf6.jar'),
                list_dir_deployement)

            stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
            stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
            stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
            stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
            stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
            stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

            submission._copy_files('%s/workspace' % prefix,
                                   "<xml>My XML</xml>", {'prop1': 'val1'})

            assert_not_equal(
                stats_udf1['fileId'],
                cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
            assert_not_equal(
                stats_udf2['fileId'],
                cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
            assert_not_equal(
                stats_udf3['fileId'],
                cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
            assert_equal(
                stats_udf4['fileId'],
                cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
            assert_not_equal(
                stats_udf5['fileId'],
                cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
            assert_equal(
                stats_udf6['fileId'],
                cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

        # Test _create_file()
        submission._create_file(deployment_dir, 'test.txt', data='Test data')
        assert_true(cluster.fs.exists(deployment_dir + '/test.txt'),
                    list_dir_workspace)

    finally:
        try:
            cluster.fs.rmtree(prefix)
        except:
            LOG.exception('failed to remove %s' % prefix)
Beispiel #35
0
        if 'Max retries exceeded with url' in str(detail):
          detail = '%s: %s' % (_('The Oozie server is not running'), detail)
        LOG.exception(smart_str(detail))
        raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail)

      jsonify = request.POST.get('format') == 'json'
      if jsonify:
        return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False)
      else:
        request.info(_('Oozie job submitted'))
        view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow'
        return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path)
    initial_params = ParameterForm.get_initial_params(parameters)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor/submit_job_popup.mako', request, {
                   'params_form': params_form,
                   'name': _('Job'),
                   'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}),
                   'show_dryrun': os.path.basename(application_path) != 'bundle.xml',
                   'return_json': request.GET.get('format') == 'json'
                 }, force_template=True).content
  return JsonResponse(popup, safe=False)


def massaged_workflow_actions_for_json(workflow_actions, oozie_coordinator, oozie_bundle):
  actions = []
Beispiel #36
0
    def test_update_properties(self):
        finish = []
        finish.append(MR_CLUSTERS.set_for_testing({'default': {}}))
        finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
        finish.append(YARN_CLUSTERS.set_for_testing({'default': {}}))
        finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True))
        try:
            properties = {
                'user.name': 'hue',
                'test.1': 'http://localhost/test?test1=test&test2=test',
                'nameNode': 'hdfs://curacao:8020',
                'jobTracker': 'jtaddress',
                'security_enabled': False
            }

            final_properties = properties.copy()
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=MockFs())
            assert_equal(properties, submission.properties)
            submission._update_properties('jtaddress', 'deployment-directory')
            assert_equal(final_properties, submission.properties)

            clear_sys_caches()
            fs = cluster.get_hdfs()
            final_properties = properties.copy()
            final_properties.update({
                'jobTracker': 'jtaddress',
                'nameNode': fs.fs_defaultfs
            })
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=fs,
                                    jt=None)
            assert_equal(properties, submission.properties)
            submission._update_properties('jtaddress', 'deployment-directory')
            assert_equal(final_properties, submission.properties)

            finish.append(
                HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing(
                    'namenode'))
            finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing(
                'jobtracker'))
            clear_sys_caches()
            fs = cluster.get_hdfs()
            final_properties = properties.copy()
            final_properties.update({
                'jobTracker': 'jobtracker',
                'nameNode': 'namenode'
            })
            submission = Submission(None,
                                    properties=properties,
                                    oozie_id='test',
                                    fs=fs,
                                    jt=None)
            assert_equal(properties, submission.properties)
        finally:
            clear_sys_caches()
            for reset in finish:
                reset()
Beispiel #37
0
def test_copy_files():
  cluster = pseudo_hdfs4.shared_cluster()

  try:
    c = make_logged_in_client()
    user = User.objects.get(username='******')

    prefix = '/tmp/test_copy_files'

    if cluster.fs.exists(prefix):
      cluster.fs.rmtree(prefix)

    # Jars in various locations
    deployment_dir = '%s/workspace' % prefix
    external_deployment_dir = '%s/deployment' % prefix
    jar_1 = '%s/udf1.jar' % prefix
    jar_2 = '%s/lib/udf2.jar' % prefix
    jar_3 = '%s/udf3.jar' % deployment_dir
    jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move
    jar_5 = 'udf5.jar'
    jar_6 = 'lib/udf6.jar' # Doesn't move

    cluster.fs.mkdir(prefix)
    cluster.fs.create(jar_1)
    cluster.fs.create(jar_2)
    cluster.fs.create(jar_3)
    cluster.fs.create(jar_4)
    cluster.fs.create(deployment_dir + '/' + jar_5)
    cluster.fs.create(deployment_dir + '/' + jar_6)

    class MockJob():
      XML_FILE_NAME = 'workflow.xml'

      def __init__(self):
        self.deployment_dir = deployment_dir
        self.nodes = [
            Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}),
            Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}),
            Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}),
            Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}),

            # Workspace relative paths
            Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}),
            Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}})
        ]

    submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt)

    submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})
    submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir)
    assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir)

    # All sources still there
    assert_true(cluster.fs.exists(jar_1))
    assert_true(cluster.fs.exists(jar_2))
    assert_true(cluster.fs.exists(jar_3))
    assert_true(cluster.fs.exists(jar_4))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

    # Lib
    deployment_dir = deployment_dir + '/lib'
    external_deployment_dir = external_deployment_dir + '/lib'

    list_dir_workspace = cluster.fs.listdir(deployment_dir)
    list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

    # All destinations there
    assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace)

    assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement)

    stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
    stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
    stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
    stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
    stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
    stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

    submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
    assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
    assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
    assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
    assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
    assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

    # Test _create_file()
    submission._create_file(deployment_dir, 'test.txt', data='Test data')
    assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace)

  finally:
    try:
      cluster.fs.rmtree(prefix)
    except:
      LOG.exception('failed to remove %s' % prefix)