Example #1
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      # Update workflow params in coordinator
      hue_coord.clear_workflow_params()
      properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

      # Deploy WF XML
      submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
      submission.deploy(deployment_dir=wf_application_path)
      submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

      # Deploy Coordinator XML
      job.conf_dict.update(mapping)
      submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
      submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
      # Server picks up deployed Coordinator XML changes after running 'update' action
      submission.update_coord()

      request.info(_('Successfully updated Workflow definition'))
      return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s' % params_form.errors))
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('editor2/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  return JsonResponse(popup, safe=False)
Example #2
0
 def rename_star(self, old_dir, new_dir):
   """Equivalent to `mv old_dir/* new"""
   if not self.isdir(old_dir):
     raise IOError(errno.ENOTDIR, _("'%s' is not a directory") % old_dir)
   if not self.exists(new_dir):
     self.mkdir(new_dir)
   elif not self.isdir(new_dir):
     raise IOError(errno.ENOTDIR, _("'%s' is not a directory") % new_dir)
   ls = self.listdir(old_dir)
   for dirent in ls:
     self.rename(Hdfs.join(old_dir, dirent), Hdfs.join(new_dir, dirent))
Example #3
0
 def rename(self, old, new):
     """rename(old, new)"""
     old = Hdfs.normpath(old)
     if not new.startswith("/"):
         new = Hdfs.join(Hdfs.dirname(old), new)
     new = Hdfs.normpath(new)
     params = self._getparams()
     params["op"] = "RENAME"
     # Encode `new' because it's in the params
     params["destination"] = smart_str(new)
     result = self._root.put(old, params)
     if not result["boolean"]:
         raise IOError("Rename failed: %s -> %s" % (smart_str(old), smart_str(new)))
Example #4
0
 def rename(self, old, new):
   """rename(old, new)"""
   old = Hdfs.normpath(old)
   if not new.startswith('/'):
     new = Hdfs.join(Hdfs.dirname(old), new)
   new = Hdfs.normpath(new)
   params = self._getparams()
   params['op'] = 'RENAME'
   # Encode `new' because it's in the params
   params['destination'] = smart_str(new)
   result = self._root.put(old, params)
   if not result['boolean']:
     raise IOError(_("Rename failed: %s -> %s") %
                   (str(smart_str(old)), str(smart_str(new))))
Example #5
0
  def listdir(self, path, glob=None):
    """
    listdir(path, glob=None) -> [ entry names ]

    Get directory entry names without stats.
    """
    dirents = self.listdir_stats(path, glob)
    return [Hdfs.basename(x.path) for x in dirents]
Example #6
0
 def _create_deployment_dir(self):
   """
   Return the job deployment directory in HDFS, creating it if necessary.
   The actual deployment dir should be 0711 owned by the user
   """
   path = Hdfs.join(REMOTE_DEPLOYMENT_DIR.get(), '_%s_-oozie-%s-%s' % (self.user.username, self.job.id, time.time()))
   self._create_dir(path)
   return path
Example #7
0
 def get_content_summary(self, path):
     """
 get_content_summary(path) -> WebHdfsContentSummary
 """
     path = Hdfs.normpath(path)
     params = self._getparams()
     params["op"] = "GETCONTENTSUMMARY"
     json = self._root.get(path, params)
     return WebHdfsContentSummary(json["ContentSummary"])
Example #8
0
def hdfs_link_js(url):
  link = 'javascript:void(0)'

  if url:
    path = Hdfs.urlsplit(url)[2]
    if path:
      link = ('/filebrowser/view=%s' if path.startswith(posixpath.sep) else '/filebrowser/home_relative_view=/%s') % path

  return link
Example #9
0
 def get_content_summary(self, path):
   """
   get_content_summary(path) -> WebHdfsContentSummary
   """
   path = Hdfs.normpath(path)
   params = self._getparams()
   params['op'] = 'GETCONTENTSUMMARY'
   json = self._root.get(path, params)
   return WebHdfsContentSummary(json['ContentSummary'])
Example #10
0
File: views.py Project: atupal/hue
def parse_breadcrumbs(path):
    breadcrumbs_parts = Hdfs.normpath(path).split('/')
    i = 1
    breadcrumbs = [{'url': '', 'label': '/'}]
    while (i < len(breadcrumbs_parts)):
        breadcrumb_url = breadcrumbs[i - 1]['url'] + '/' + breadcrumbs_parts[i]
        if breadcrumb_url != '/':
            breadcrumbs.append({'url': breadcrumb_url, 'label': breadcrumbs_parts[i]})
        i = i + 1
    return breadcrumbs
Example #11
0
def parse_breadcrumbs(path):
    breadcrumbs_parts = Hdfs.normpath(path).split("/")
    i = 1
    breadcrumbs = [{"url": "", "label": "/"}]
    while i < len(breadcrumbs_parts):
        breadcrumb_url = breadcrumbs[i - 1]["url"] + "/" + breadcrumbs_parts[i]
        if breadcrumb_url != "/":
            breadcrumbs.append({"url": breadcrumb_url, "label": breadcrumbs_parts[i]})
        i = i + 1
    return breadcrumbs
Example #12
0
def _get_service_url(hdfs_config):
  override = hdfs_config.WEBHDFS_URL.get()
  if override:
    return override

  fs_defaultfs = hdfs_config.FS_DEFAULTFS.get()
  netloc = Hdfs.urlsplit(fs_defaultfs)[1]
  host = netloc.split(':')[0]
  port = hadoop.conf.DEFAULT_NN_HTTP_PORT
  return "http://%s:%s/webhdfs/v1" % (host, port)
Example #13
0
  def append(self, path, data):
    """
    append(path, data)

    Append data to a given file.
    """
    path = Hdfs.normpath(path)
    params = self._getparams()
    params['op'] = 'APPEND'
    self._invoke_with_redirect('POST', path, params, data)
Example #14
0
 def chown(self, path, user=None, group=None):
   """chown(path, user=None, group=None)"""
   path = Hdfs.normpath(path)
   params = self._getparams()
   params['op'] = 'SETOWNER'
   if user is not None:
     params['owner'] = user
   if group is not None:
     params['group'] = group
   self._root.put(path, params)
Example #15
0
    def append(self, path, data):
        """
    append(path, data)

    Append data to a given file.
    """
        path = Hdfs.normpath(path)
        params = self._getparams()
        params["op"] = "APPEND"
        self._invoke_with_redirect("POST", path, params, data)
Example #16
0
  def chmod(self, path, mode):
    """
    chmod(path, mode)

    `mode' should be an octal integer or string.
    """
    path = Hdfs.normpath(path)
    params = self._getparams()
    params['op'] = 'SETPERMISSION'
    params['permission'] = safe_octal(mode)
    self._root.put(path, params)
Example #17
0
def create_directory(request):
    parent_path = json.loads(request.POST.get("parent_path"))
    name = json.loads(request.POST.get("name"))

    parent_dir = Directory.objects.get(owner=request.user, name=parent_path)

    path = Hdfs.normpath(parent_path + "/" + name)
    file_doc = Directory.objects.create(name=path, type="directory", owner=request.user)
    parent_dir.dependencies.add(file_doc)

    return JsonResponse({"status": 0, "file": file_doc.to_dict()})
Example #18
0
 def _create_deployment_dir(self):
   """
   Return the job deployment directory in HDFS, creating it if necessary.
   The actual deployment dir should be 0711 owned by the user
   """
   if self.user != self.job.owner:
     path = Hdfs.join(REMOTE_DEPLOYMENT_DIR.get(), '_%s_-oozie-%s-%s' % (self.user.username, self.job.id, time.time()))
     self.fs.copy_remote_dir(self.job.deployment_dir, path, owner=self.user, dir_mode=0711)
   else:
     path = self.job.deployment_dir
     self._create_dir(path)
   return path
Example #19
0
File: views.py Project: atupal/hue
def listdir(request, path, chooser):
    """
    Implements directory listing (or index).

    Intended to be called via view().

    TODO: Remove?
    """
    if not request.fs.isdir(path):
        raise PopupException(_("Not a directory: %(path)s") % {'path': path})

    file_filter = request.REQUEST.get('file_filter', 'any')

    assert file_filter in ['any', 'file', 'dir']

    home_dir_path = request.user.get_home_directory()

    breadcrumbs = parse_breadcrumbs(path)

    data = {
        'path': path,
        'file_filter': file_filter,
        'breadcrumbs': breadcrumbs,
        'current_dir_path': path,
        # These could also be put in automatically via
        # http://docs.djangoproject.com/en/dev/ref/templates/api/#django-core-context-processors-request,
        # but manually seems cleaner, since we only need it here.
        'current_request_path': request.path,
        'home_directory': request.fs.isdir(home_dir_path) and home_dir_path or None,
        'cwd_set': True,
        'is_superuser': request.user.username == request.fs.superuser,
        'groups': request.user.username == request.fs.superuser and [str(x) for x in Group.objects.values_list('name', flat=True)] or [],
        'users': request.user.username == request.fs.superuser and [str(x) for x in User.objects.values_list('username', flat=True)] or [],
        'superuser': request.fs.superuser,
        'show_upload': (request.REQUEST.get('show_upload') == 'false' and (False,) or (True,))[0]
    }

    stats = request.fs.listdir_stats(path)

    # Include parent dir, unless at filesystem root.
    if Hdfs.normpath(path) != posixpath.sep:
        parent_path = request.fs.join(path, "..")
        parent_stat = request.fs.stats(parent_path)
        # The 'path' field would be absolute, but we want its basename to be
        # actually '..' for display purposes. Encode it since _massage_stats expects byte strings.
        parent_stat['path'] = parent_path
        stats.insert(0, parent_stat)

    data['files'] = [_massage_stats(request, stat) for stat in stats]
    if chooser:
        return render('chooser.mako', request, data)
    else:
        return render('listdir.mako', request, data)
Example #20
0
def hdfs_link(url):
  if url:
    path = Hdfs.urlsplit(url)[2]
    if path:
      if path.startswith(posixpath.sep):
        return "/filebrowser/view=" + path
      else:
        return "/filebrowser/home_relative_view=/" + path
    else:
      return url
  else:
    return url
Example #21
0
 def _stats(self, path):
   """This version of stats returns None if the entry is not found"""
   path = Hdfs.normpath(path)
   params = self._getparams()
   params['op'] = 'GETFILESTATUS'
   try:
     json = self._root.get(path, params)
     return WebHdfsStat(json['FileStatus'], path)
   except WebHdfsException, ex:
     if ex.server_exc == 'FileNotFoundException' or ex.code == 404:
       return None
     raise ex
Example #22
0
def create_directories(fs, directory_list=[]):
  # If needed, create the remote home, deployment and data directories
  directories = [REMOTE_DEPLOYMENT_DIR.get()] + directory_list

  for directory in directories:
    if not fs.do_as_user(fs.DEFAULT_USER, fs.exists, directory):
      remote_home_dir = Hdfs.join('/user', fs.DEFAULT_USER)
      if directory.startswith(remote_home_dir):
        # Home is 755
        fs.do_as_user(fs.DEFAULT_USER, fs.create_home_dir, remote_home_dir)
      # Shared by all the users
      fs.do_as_user(fs.DEFAULT_USER, fs.mkdir, directory, 01777)
      fs.do_as_user(fs.DEFAULT_USER, fs.chmod, directory, 01777) # To remove after https://issues.apache.org/jira/browse/HDFS-3491
Example #23
0
  def check_access(self, path, aclspec='rw-'):
    path = Hdfs.normpath(path)
    params = self._getparams()
    params['op'] = 'CHECKACCESS'
    params['fsaction'] = aclspec

    try:
      return self._root.get(path, params)
    except WebHdfsException, ex:
      if ex.code == 500 or ex.code == 400:
        LOG.warn('Failed to check access to path %s, CHECKACCESS operation may not be supported.' % path)
        return None
      else:
        raise ex
Example #24
0
  def mkdir(self, path, mode=None):
    """
    mkdir(path, mode=None)

    Creates a directory and any parent directory if necessary.
    """
    path = Hdfs.normpath(path)
    params = self._getparams()
    params['op'] = 'MKDIRS'
    if mode is not None:
      params['permission'] = safe_octal(mode)
    success = self._root.put(path, params)
    if not success:
      raise IOError(_("Mkdir failed: %s") % path)
Example #25
0
  def listdir_stats(self, path, glob=None):
    """
    listdir_stats(path, glob=None) -> [ WebHdfsStat ]

    Get directory listing with stats.
    """
    path = Hdfs.normpath(path)
    params = self._getparams()
    if glob is not None:
      params['filter'] = glob
    params['op'] = 'LISTSTATUS'
    json = self._root.get(path, params)
    filestatus_list = json['FileStatuses']['FileStatus']
    return [ WebHdfsStat(st, path) for st in filestatus_list ]
Example #26
0
    def mkdir(self, path, mode=None):
        """
    mkdir(path, mode=None)

    Creates a directory and any parent directory if necessary.
    """
        path = Hdfs.normpath(path)
        params = self._getparams()
        params["op"] = "MKDIRS"
        if mode is not None:
            params["permission"] = safe_octal(mode)
        success = self._root.put(path, params)
        if not success:
            raise IOError("Mkdir failed: %s" % (smart_str(path),))
Example #27
0
 def chown(self, path, user=None, group=None, recursive=False):
   """chown(path, user=None, group=None, recursive=False)"""
   path = Hdfs.normpath(path)
   params = self._getparams()
   params['op'] = 'SETOWNER'
   if user is not None:
     params['owner'] = user
   if group is not None:
     params['group'] = group
   if recursive:
     for xpath in self.listdir_recursive(path):
       self._root.put(xpath, params)
   else:
     self._root.put(path, params)
Example #28
0
    def listdir_stats(self, path, glob=None):
        """
    listdir_stats(path, glob=None) -> [ WebHdfsStat ]

    Get directory listing with stats.
    """
        path = Hdfs.normpath(path)
        params = self._getparams()
        if glob is not None:
            params["filter"] = glob
        params["op"] = "LISTSTATUS"
        json = self._root.get(path, params)
        filestatus_list = json["FileStatuses"]["FileStatus"]
        return [WebHdfsStat(st, path) for st in filestatus_list]
Example #29
0
 def chown(self, path, user=None, group=None, recursive=False):
     """chown(path, user=None, group=None, recursive=False)"""
     path = Hdfs.normpath(path)
     params = self._getparams()
     params["op"] = "SETOWNER"
     if user is not None:
         params["owner"] = user
     if group is not None:
         params["group"] = group
     if recursive:
         for xpath in self._listdir_r(path):
             self._root.put(xpath, params)
     else:
         self._root.put(path, params)
Example #30
0
File: api2.py Project: antbell/hue
def create_directory(request):
  parent_path = json.loads(request.POST.get('parent_path'))
  name = json.loads(request.POST.get('name'))

  parent_dir = Directory.objects.get(owner=request.user, name=parent_path)

  path = Hdfs.normpath(parent_path + '/' + name)
  file_doc = Directory.objects.create(name=path, owner=request.user)
  parent_dir.dependencies.add(file_doc)

  return JsonResponse({
      'status': 0,
      'file': file_doc.to_dict()
  })
Example #31
0
 def strip_normpath(self, path):
   split = urlparse(path)
   path = split._replace(scheme="", netloc="").geturl()
   return Hdfs.normpath(path)
Example #32
0
        ]:
            metrics = api.get_metrics()
            sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[
                'gauges'] and [
                    metrics['gauges']['libs.sharelib.system.libpath']['value']
                ] or []
        else:
            intrumentation = api.get_instrumentation()
            sharelib_url = [
                param['value'] for group in intrumentation['variables']
                for param in group['data']
                if param['name'] == 'sharelib.system.libpath'
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _('Oozie Share Lib path is not available')))

        class ConfigMock:
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in get_all_hdfs().values():
Example #33
0
class Submission(object):
    """
  Represents one unique Oozie submission.

  Actions are:
  - submit
  - rerun
  """
    def __init__(self,
                 user,
                 job=None,
                 fs=None,
                 jt=None,
                 properties=None,
                 oozie_id=None,
                 local_tz=None):
        self.job = job
        self.user = user
        self.fs = fs
        self.jt = jt  # Deprecated with YARN, we now use logical names only for RM
        self.oozie_id = oozie_id
        self.api = get_oozie(self.user)

        if properties is not None:
            self.properties = properties
        else:
            self.properties = {}

        if local_tz and isinstance(self.job.data, dict):
            local_tz = self.job.data.get('properties')['timezone']

        # Modify start_date & end_date only when it's a coordinator
        from oozie.models2 import Coordinator
        if type(self.job) is Coordinator:
            if 'start_date' in self.properties:
                properties['start_date'] = convert_to_server_timezone(
                    self.properties['start_date'], local_tz)
            if 'end_date' in self.properties:
                properties['end_date'] = convert_to_server_timezone(
                    self.properties['end_date'], local_tz)

        if 'nominal_time' in self.properties:
            properties['nominal_time'] = convert_to_server_timezone(
                self.properties['nominal_time'], local_tz)

        self.properties['security_enabled'] = self.api.security_enabled

    def __str__(self):
        if self.oozie_id:
            res = "Submission for job '%s'." % (self.oozie_id, )
        else:
            res = "Submission for job '%s' (id %s, owner %s)." % (
                self.job.name, self.job.id, self.user)
        if self.oozie_id:
            res += " -- " + self.oozie_id
        return res

    @submit_dryrun
    def run(self, deployment_dir=None):
        """
    Take care of all the actions of submitting a Oozie workflow.
    Returns the oozie job id if all goes well.
    """

        if self.properties and 'oozie.use.system.libpath' not in self.properties:
            self.properties['oozie.use.system.libpath'] = 'true'

        self.oozie_id = self.api.submit_job(self.properties)
        LOG.info("Submitted: %s" % (self, ))

        if self._is_workflow():
            self.api.job_control(self.oozie_id, 'start')
            LOG.info("Started: %s" % (self, ))

        return self.oozie_id

    def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update({'oozie.wf.application.path': deployment_dir})

        if 'oozie.coord.application.path' in self.properties:
            self.properties.pop('oozie.coord.application.path')

        if 'oozie.bundle.application.path' in self.properties:
            self.properties.pop('oozie.bundle.application.path')

        if fail_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes})
        elif not skip_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes':
                                    'false'})  # Case empty 'skip_nodes' list
        else:
            self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes})

        self.api.rerun(self.oozie_id, properties=self.properties)

        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def rerun_coord(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.coord.application.path': deployment_dir})

        self.api.job_control(self.oozie_id,
                             action='coord-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def update_coord(self):
        self.api = get_oozie(self.user, api_version="v2")
        self.api.job_control(self.oozie_id,
                             action='update',
                             properties=self.properties,
                             parameters=None)
        LOG.info("Update: %s" % (self, ))

        return self.oozie_id

    def rerun_bundle(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.bundle.application.path': deployment_dir})
        self.api.job_control(self.oozie_id,
                             action='bundle-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def deploy(self, deployment_dir=None):
        try:
            if not deployment_dir:
                deployment_dir = self._create_deployment_dir()
        except Exception, ex:
            msg = _("Failed to create deployment directory: %s" % ex)
            LOG.exception(msg)
            raise PopupException(message=msg, detail=str(ex))

        if self.api.security_enabled:
            jt_address = cluster.get_cluster_addr_for_job_submission()
            self._update_properties(
                jt_address
            )  # Needed for coordinator deploying workflows with credentials

        if hasattr(self.job, 'nodes'):
            for action in self.job.nodes:
                # Make sure XML is there
                # Don't support more than one level sub-workflow
                if action.data['type'] == 'subworkflow':
                    from oozie.models2 import Workflow
                    workflow = Workflow(document=Document2.objects.get_by_uuid(
                        user=self.user,
                        uuid=action.data['properties']['workflow']))
                    sub_deploy = Submission(self.user, workflow, self.fs,
                                            self.jt, self.properties)
                    workspace = sub_deploy.deploy()

                    self.job.override_subworkflow_id(
                        action,
                        workflow.id)  # For displaying the correct graph
                    self.properties[
                        'workspace_%s' % workflow.
                        uuid] = workspace  # For pointing to the correct workspace

                elif action.data['type'] == 'altus':
                    service = 'dataeng'  # action.data['properties'].get('script_path')
                    auth_key_id = ALTUS.AUTH_KEY_ID.get()
                    auth_key_secret = ALTUS.AUTH_KEY_SECRET.get().replace(
                        '\\n', '\n')
                    shell_script = self._generate_altus_action_script(
                        service=service,
                        auth_key_id=auth_key_id,
                        auth_key_secret=auth_key_secret)
                    self._create_file(deployment_dir,
                                      action.data['name'] + '.py',
                                      shell_script)
                    self.fs.do_as_user(
                        self.user, self.fs.copyFromLocal,
                        os.path.join(get_desktop_root(), 'core', 'ext-py',
                                     'navoptapi-0.1.0'),
                        self.job.deployment_dir)

                elif action.data['type'] == 'impala' or action.data[
                        'type'] == 'impala-document':
                    from oozie.models2 import _get_impala_url
                    from impala.impala_flags import get_ssl_server_certificate

                    if action.data['type'] == 'impala-document':
                        from notebook.models import Notebook
                        if action.data['properties'].get('uuid'):
                            notebook = Notebook(
                                document=Document2.objects.get_by_uuid(
                                    user=self.user,
                                    uuid=action.data['properties']['uuid']))
                            statements = notebook.get_str()
                            statements = Template(statements).safe_substitute(
                                **self.properties)
                            script_name = action.data['name'] + '.sql'
                            self._create_file(deployment_dir, script_name,
                                              statements)
                    else:
                        script_name = os.path.basename(
                            action.data['properties'].get('script_path'))

                    if self.api.security_enabled:
                        kinit = 'kinit -k -t *.keytab %(user_principal)s' % {
                            'user_principal':
                            self.properties.get(
                                'user_principal', action.data['properties'].
                                get('user_principal'))
                        }
                    else:
                        kinit = ''

                    shell_script = """#!/bin/bash

# Needed to launch impala shell in oozie
export PYTHON_EGG_CACHE=./myeggs

%(kinit)s

impala-shell %(kerberos_option)s %(ssl_option)s -i %(impalad_host)s -f %(query_file)s""" % {
                        'impalad_host':
                        action.data['properties'].get('impalad_host')
                        or _get_impala_url(),
                        'kerberos_option':
                        '-k' if self.api.security_enabled else '',
                        'ssl_option':
                        '--ssl' if get_ssl_server_certificate() else '',
                        'query_file':
                        script_name,
                        'kinit':
                        kinit
                    }

                    self._create_file(deployment_dir,
                                      action.data['name'] + '.sh',
                                      shell_script)

                elif action.data['type'] == 'hive-document':
                    from notebook.models import Notebook
                    if action.data['properties'].get('uuid'):
                        notebook = Notebook(
                            document=Document2.objects.get_by_uuid(
                                user=self.user,
                                uuid=action.data['properties']['uuid']))
                        statements = notebook.get_str()
                    else:
                        statements = action.data['properties'].get(
                            'statements')

                    if self.properties.get('send_result_path'):
                        statements = """
INSERT OVERWRITE DIRECTORY '%s'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = "\t",
   "quoteChar"     = "'",
   "escapeChar"    = "\\"
)
STORED AS TEXTFILE %s""" % (self.properties.get('send_result_path'),
                            '\n\n\n'.join([
                                snippet['statement_raw']
                                for snippet in notebook.get_data()['snippets']
                            ]))

                    if statements is not None:
                        self._create_file(deployment_dir,
                                          action.data['name'] + '.sql',
                                          statements)

                elif action.data['type'] in ('java-document', 'java',
                                             'mapreduce-document'):
                    if action.data['type'] == 'java-document' or action.data[
                            'type'] == 'mapreduce-document':
                        from notebook.models import Notebook
                        notebook = Notebook(
                            document=Document2.objects.get_by_uuid(
                                user=self.user,
                                uuid=action.data['properties']['uuid']))
                        properties = notebook.get_data(
                        )['snippets'][0]['properties']
                    else:
                        properties = action.data['properties']

                    if properties.get('app_jar'):
                        LOG.debug("Adding to oozie.libpath %s" %
                                  properties['app_jar'])
                        paths = [properties['app_jar']]
                        if self.properties.get('oozie.libpath'):
                            paths.append(self.properties['oozie.libpath'])
                        self.properties['oozie.libpath'] = ','.join(paths)

                elif action.data['type'] == 'pig-document':
                    from notebook.models import Notebook
                    notebook = Notebook(document=Document2.objects.get_by_uuid(
                        user=self.user, uuid=action.data['properties']
                        ['uuid']))
                    statements = notebook.get_data(
                    )['snippets'][0]['statement_raw']

                    self._create_file(deployment_dir,
                                      action.data['name'] + '.pig', statements)
                elif action.data['type'] == 'spark' or action.data[
                        'type'] == 'spark-document':
                    if not [
                            f for f in action.data.get('properties').get(
                                'files', [])
                            if f.get('value').endswith('hive-site.xml')
                    ]:
                        hive_site_lib = Hdfs.join(deployment_dir + '/lib/',
                                                  'hive-site.xml')
                        hive_site_content = get_hive_site_content()
                        if not self.fs.do_as_user(
                                self.user, self.fs.exists,
                                hive_site_lib) and hive_site_content:
                            self.fs.do_as_user(
                                self.user,
                                self.fs.create,
                                hive_site_lib,
                                overwrite=True,
                                permission=0700,
                                data=smart_str(hive_site_content))

        oozie_xml = self.job.to_xml(self.properties)
        self._do_as(self.user.username, self._copy_files, deployment_dir,
                    oozie_xml, self.properties)

        return deployment_dir
Example #34
0
 def get_acl_status(self, path):
     path = Hdfs.normpath(path)
     params = self._getparams()
     params['op'] = 'GETACLSTATUS'
     return self._root.get(path, params)
Example #35
0
def listdir_paged(request, path):
    """
    A paginated version of listdir.

    Query parameters:
      pagenum           - The page number to show. Defaults to 1.
      pagesize          - How many to show on a page. Defaults to 15.
      sortby=?          - Specify attribute to sort by. Accepts:
                            (type, name, atime, mtime, size, user, group)
                          Defaults to name.
      descending        - Specify a descending sort order.
                          Default to false.
      filter=?          - Specify a substring filter to search for in
                          the filename field.
    """
    if not request.fs.isdir(path):
        raise PopupException("Not a directory: %s" % (path, ))

    pagenum = int(request.GET.get('pagenum', 1))
    pagesize = int(request.GET.get('pagesize', 30))

    home_dir_path = request.user.get_home_directory()
    breadcrumbs = parse_breadcrumbs(path)

    all_stats = request.fs.listdir_stats(path)

    # Filter first
    filter_str = request.GET.get('filter', None)
    if filter_str:
        filtered_stats = filter(lambda sb: filter_str in sb['name'], all_stats)
        all_stats = filtered_stats

    # Sort next
    sortby = request.GET.get('sortby', None)
    descending_param = request.GET.get('descending', None)
    if sortby is not None:
        if sortby not in ('type', 'name', 'atime', 'mtime', 'user', 'group',
                          'size'):
            logger.info("Invalid sort attribute '%s' for listdir." %
                        (sortby, ))
        else:
            all_stats = sorted(all_stats,
                               key=operator.attrgetter(sortby),
                               reverse=coerce_bool(descending_param))

    # Do pagination
    page = paginator.Paginator(all_stats, pagesize).page(pagenum)
    shown_stats = page.object_list
    # Include parent dir always as first option, unless at filesystem root.
    if Hdfs.normpath(path) != posixpath.sep:
        parent_path = request.fs.join(path, "..")
        parent_stat = request.fs.stats(parent_path)
        # The 'path' field would be absolute, but we want its basename to be
        # actually '..' for display purposes. Encode it since _massage_stats expects byte strings.
        parent_stat['path'] = parent_path
        parent_stat['name'] = ".."
        shown_stats.insert(0, parent_stat)
    page.object_list = [_massage_stats(request, s) for s in shown_stats]

    data = {
        'path':
        path,
        'breadcrumbs':
        breadcrumbs,
        'current_request_path':
        request.path,
        'files':
        page.object_list,
        'page':
        _massage_page(page),
        'pagesize':
        pagesize,
        'home_directory':
        request.fs.isdir(home_dir_path) and home_dir_path or None,
        'filter_str':
        filter_str,
        'sortby':
        sortby,
        'descending':
        descending_param,
        # The following should probably be deprecated
        'cwd_set':
        True,
        'file_filter':
        'any',
        'current_dir_path':
        path,
        'is_fs_superuser':
        request.user.username == request.fs.superuser,
        'is_superuser':
        request.user.username == request.fs.superuser,
        'groups':
        request.user.username == request.fs.superuser
        and [str(x)
             for x in Group.objects.values_list('name', flat=True)] or [],
        'users':
        request.user.username == request.fs.superuser
        and [str(x)
             for x in User.objects.values_list('username', flat=True)] or [],
        'superuser':
        request.fs.superuser
    }
    return render('listdir.mako', request, data)
Example #36
0
File: conf.py Project: zlcken/hue
def config_validator(user):
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from hadoop.cluster import get_all_hdfs
    from hadoop.fs.hadoopfs import Hdfs
    from liboozie.oozie_api import get_oozie

    res = []

    if OOZIE_URL.get():
        status = get_oozie_status(user)
        if 'NORMAL' not in status:
            res.append((status, _('The Oozie server is not available')))

        api = get_oozie(user, api_version="v2")

        configuration = api.get_configuration()
        if 'org.apache.oozie.service.MetricsInstrumentationService' in [
                c.strip()
                for c in configuration.get('oozie.services.ext', '').split(',')
        ]:
            metrics = api.get_metrics()
            sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[
                'gauges'] and [
                    metrics['gauges']['libs.sharelib.system.libpath']['value']
                ] or []
        else:
            intrumentation = api.get_instrumentation()
            sharelib_url = [
                param['value'] for group in intrumentation['variables']
                for param in group['data']
                if param['name'] == 'sharelib.system.libpath'
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _('Oozie Share Lib path is not available')))

        class ConfigMock:
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in get_all_hdfs().values():
            res.extend(
                validate_path(
                    ConfigMock(sharelib_url),
                    is_dir=True,
                    fs=cluster,
                    message=_(
                        'Oozie Share Lib not installed in default location.')))

    return res
Example #37
0
 def remove_acl_entries(self, path, aclspec):
     path = Hdfs.normpath(path)
     params = self._getparams()
     params['op'] = 'REMOVEACLENTRIES'
     params['aclspec'] = aclspec
     return self._root.put(path, params)
Example #38
0
 def modify_acl_entries(self, path, aclspec):
   path = Hdfs.normpath(path)
   params = self._getparams()
   params['op'] = 'MODIFYACLENTRIES'
   params['aclspec'] = aclspec
   return self._root.put(path, params)
Example #39
0
def config_validator(user):
    """
  config_validator() -> [ (config_variable, error_message) ]

  Called by core check_config() view.
  """
    from desktop.lib.fsmanager import get_filesystem
    from hadoop.cluster import get_all_hdfs
    from hadoop.fs.hadoopfs import Hdfs
    from liboozie.oozie_api import get_oozie

    res = []

    try:
        from oozie.conf import REMOTE_SAMPLE_DIR
    except Exception as e:
        LOG.warn('Config check failed because Oozie app not installed: %s' % e)
        return res

    if OOZIE_URL.get():
        status = get_oozie_status(user)
        if 'NORMAL' not in status:
            res.append((status, _('The Oozie server is not available')))
        fs = get_filesystem()
        NICE_NAME = 'Oozie'
        if fs.do_as_superuser(fs.exists, REMOTE_SAMPLE_DIR.get()):
            stats = fs.do_as_superuser(fs.stats, REMOTE_SAMPLE_DIR.get())
            mode = oct(stats.mode)
            # if neither group nor others have write permission
            group_has_write = int(mode[-2]) & 2
            others_has_write = int(mode[-1]) & 2

            if not group_has_write and not others_has_write:
                res.append(
                    (NICE_NAME,
                     "The permissions of workspace '%s' are too restrictive" %
                     REMOTE_SAMPLE_DIR.get()))

        api = get_oozie(user, api_version="v2")

        configuration = api.get_configuration()
        if 'org.apache.oozie.service.MetricsInstrumentationService' in [
                c.strip()
                for c in configuration.get('oozie.services.ext', '').split(',')
        ]:
            metrics = api.get_metrics()
            sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[
                'gauges'] and [
                    metrics['gauges']['libs.sharelib.system.libpath']['value']
                ] or []
        else:
            intrumentation = api.get_instrumentation()
            sharelib_url = [
                param['value'] for group in intrumentation['variables']
                for param in group['data']
                if param['name'] == 'sharelib.system.libpath'
            ]

        if sharelib_url:
            sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2]

        if not sharelib_url:
            res.append((status, _('Oozie Share Lib path is not available')))

        class ConfigMock(object):
            def __init__(self, value):
                self.value = value

            def get(self):
                return self.value

            def get_fully_qualifying_key(self):
                return self.value

        for cluster in list(get_all_hdfs().values()):
            res.extend(
                validate_path(
                    ConfigMock(sharelib_url),
                    is_dir=True,
                    fs=cluster,
                    message=_(
                        'Oozie Share Lib not installed in default location.')))

    return res
Example #40
0
def listdir(request, path, chooser):
    """
    Implements directory listing (or index).

    Intended to be called via view().
    """
    if not request.fs.isdir(path):
        raise PopupException(_("Not a directory: %(path)s") % {'path': path})

    file_filter = request.REQUEST.get('file_filter', 'any')

    assert file_filter in ['any', 'file', 'dir']

    home_dir_path = request.user.get_home_directory()

    breadcrumbs = parse_breadcrumbs(path)

    data = {
        'path':
        path,
        'file_filter':
        file_filter,
        'breadcrumbs':
        breadcrumbs,
        'current_dir_path':
        path,
        # These could also be put in automatically via
        # http://docs.djangoproject.com/en/dev/ref/templates/api/#django-core-context-processors-request,
        # but manually seems cleaner, since we only need it here.
        'current_request_path':
        request.path,
        'home_directory':
        request.fs.isdir(home_dir_path) and home_dir_path or None,
        'cwd_set':
        True,
        'is_superuser':
        request.user.username == request.fs.superuser,
        'groups':
        request.user.username == request.fs.superuser
        and [str(x)
             for x in Group.objects.values_list('name', flat=True)] or [],
        'users':
        request.user.username == request.fs.superuser
        and [str(x)
             for x in User.objects.values_list('username', flat=True)] or [],
        'superuser':
        request.fs.superuser,
        'show_upload':
        (request.REQUEST.get('show_upload') == 'false' and (False, )
         or (True, ))[0]
    }

    stats = request.fs.listdir_stats(path)

    # Include parent dir, unless at filesystem root.
    if Hdfs.normpath(path) != posixpath.sep:
        parent_path = request.fs.join(path, "..")
        parent_stat = request.fs.stats(parent_path)
        # The 'path' field would be absolute, but we want its basename to be
        # actually '..' for display purposes. Encode it since _massage_stats expects byte strings.
        parent_stat['path'] = parent_path
        stats.insert(0, parent_stat)

    data['files'] = [_massage_stats(request, stat) for stat in stats]
    if chooser:
        return render('chooser.mako', request, data)
    else:
        return render('listdir.mako', request, data)
Example #41
0
def sync_coord_workflow(request, job_id):
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  job = check_job_access_permission(request, job_id)
  check_job_edition_permission(job, request.user)

  hue_coord = get_history().get_coordinator_from_config(job.conf_dict)
  hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict)
  wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or ''
  coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or ''
  properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None

  if request.method == 'POST':
    response = {'status': -1, 'message': ''}
    params_form = ParametersFormSet(request.POST)
    if params_form.is_valid():
      try:
        mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

        # Update workflow params in coordinator
        hue_coord.clear_workflow_params()
        properties = dict([(param['name'], param['value']) for param in hue_coord.properties])

        # Deploy WF XML
        submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties)
        submission.deploy(deployment_dir=wf_application_path)
        submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True)

        # Deploy Coordinator XML
        job.conf_dict.update(mapping)
        submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id)
        submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True)
        # Server picks up deployed Coordinator XML changes after running 'update' action
        submission.update_coord()

        response['status'] = 0
        response['message'] = _('Successfully updated Workflow definition')
      except Exception as e:
        response['message'] = e.message

    else:
      response['message'] = _('Invalid submission form: %s' % params_form.errors)

    return JsonResponse(response)
  else:
    new_params = hue_wf and hue_wf.find_all_parameters() or []
    new_params = dict([(param['name'], param['value']) for param in new_params])

    # Set previous values
    if properties:
      new_params = dict([(key, properties[key]) if key in list(properties.keys()) else (key, new_params[key]) for key, value in new_params.items()])

    initial_params = ParameterForm.get_initial_params(new_params)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('/scheduler/submit_job_popup.mako', request, {
             'params_form': params_form,
             'name': _('Job'),
             'header': _('Sync Workflow definition?'),
             'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id})
           }, force_template=True).content
  if not isinstance(popup, str):
    popup = popup.decode('utf-8')
  return JsonResponse(popup, safe=False)
Example #42
0
 def urlsplit(url):
   return Hdfs.urlsplit(url)
Example #43
0
File: sql.py Project: ymping/hue
    def create_table_from_a_file(self,
                                 source,
                                 destination,
                                 start_time=-1,
                                 file_encoding=None):
        if '.' in destination['name']:
            database, table_name = destination['name'].split('.', 1)
        else:
            database = 'default'
            table_name = destination['name']
        final_table_name = table_name

        table_format = destination['tableFormat']
        source_type = source['sourceType']

        columns = destination['columns']
        partition_columns = destination['partitionColumns']
        kudu_partition_columns = destination['kuduPartitionColumns']
        comment = destination['description']

        source_path = urllib_unquote(source['path'])
        load_data = destination['importData']
        external = not destination['useDefaultLocation']
        external_path = urllib_unquote(destination['nonDefaultLocation'])

        editor_type = destination['sourceType']
        is_transactional = destination['isTransactional']
        default_transactional_type = 'insert_only' if destination[
            'isInsertOnly'] else 'default'

        skip_header = destination['hasHeader']

        primary_keys = destination['primaryKeys']

        if destination['useCustomDelimiters']:
            field_delimiter = destination['customFieldDelimiter']
            collection_delimiter = destination[
                'customCollectionDelimiter'] or None
            map_delimiter = destination['customMapDelimiter'] or None
        else:
            field_delimiter = ','
            collection_delimiter = r'\002'
            map_delimiter = r'\003'
        regexp_delimiter = destination['customRegexp']

        file_format = 'TextFile'
        row_format = 'Delimited'
        serde_name = ''
        serde_properties = ''
        extra_create_properties = ''
        sql = ''

        if source['inputFormat'] == 'manual':
            load_data = False
            source['format'] = {'quoteChar': '"', 'fieldSeparator': ','}

        if table_format == 'json':
            row_format = 'serde'
            serde_name = 'org.apache.hive.hcatalog.data.JsonSerDe'
        elif table_format == 'regexp':
            row_format = 'serde'
            serde_name = 'org.apache.hadoop.hive.serde2.RegexSerDe'
            serde_properties = '"input.regex" = "%s"' % regexp_delimiter
        elif table_format == 'csv':
            if source['format']['quoteChar'] == '"':
                source['format']['quoteChar'] = '\\"'
            row_format = 'serde'
            serde_name = 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
            serde_properties = '''"separatorChar" = "%(fieldSeparator)s",
    "quoteChar"     = "%(quoteChar)s",
    "escapeChar"    = "\\\\"
    ''' % source['format']

        use_temp_table = table_format in ('parquet', 'orc',
                                          'kudu') or is_transactional
        if use_temp_table:  # We'll be using a temp table to load data
            if load_data:
                table_name, final_table_name = 'hue__tmp_%s' % table_name, table_name

                sql += '\n\nDROP TABLE IF EXISTS `%(database)s`.`%(table_name)s`;\n' % {
                    'database': database,
                    'table_name': table_name
                }
            else:  # Manual
                row_format = ''
                file_format = table_format
                skip_header = False
                if table_format == 'kudu':
                    columns = [
                        col for col in columns if col['name'] in primary_keys
                    ] + [
                        col
                        for col in columns if col['name'] not in primary_keys
                    ]

        if table_format == 'kudu':
            collection_delimiter = None
            map_delimiter = None

        if external or (load_data and table_format in (
                'parquet', 'orc', 'kudu')):  # We'll use location to load data
            if not self.fs.isdir(external_path):  # File selected
                external_path, external_file_name = Hdfs.split(external_path)

                if len(self.fs.listdir(external_path)) > 1:
                    # If dir not just the file, create data dir and move file there. Make sure it's unique.
                    external_path = external_path + '/%s%s_table' % (
                        external_file_name, str(uuid.uuid4()))
                    self.fs.mkdir(external_path)
                    self.fs.rename(source_path, external_path)
        elif load_data:  # We'll use load data command
            parent_path = self.fs.parent_path(source_path)
            stats = self.fs.stats(parent_path)
            split = urlparse(source_path)
            # Only for HDFS, import data and non-external table
            if split.scheme in ('', 'hdfs') and oct(stats["mode"])[-1] != '7':
                user_scratch_dir = self.fs.get_home_dir(
                ) + '/.scratchdir/%s' % str(
                    uuid.uuid4())  # Make sure it's unique.
                self.fs.do_as_user(self.user, self.fs.mkdir, user_scratch_dir,
                                   0o0777)
                self.fs.do_as_user(self.user, self.fs.rename, source['path'],
                                   user_scratch_dir)
                source_path = user_scratch_dir + '/' + source['path'].split(
                    '/')[-1]

        if external_path.lower().startswith(
                "abfs"):  #this is to check if its using an ABFS path
            external_path = abfspath(external_path)

        tbl_properties = OrderedDict()
        if skip_header:
            tbl_properties['skip.header.line.count'] = '1'
        # The temp table is not transactional, but final table can be if is_transactional.
        # tbl_properties that don't exist in previous versions can safely be added without error.
        tbl_properties['transactional'] = 'false'

        sql += django_mako.render_to_string(
            "gen/create_table_statement.mako", {
                'table': {
                    'name':
                    table_name,
                    'comment':
                    comment,
                    'row_format':
                    row_format,
                    'field_terminator':
                    field_delimiter,
                    'collection_terminator':
                    collection_delimiter if source_type == 'hive' else None,
                    'map_key_terminator':
                    map_delimiter if source_type == 'hive' else None,
                    'serde_name':
                    serde_name,
                    'serde_properties':
                    serde_properties,
                    'file_format':
                    file_format,
                    'external':
                    external or load_data
                    and table_format in ('parquet', 'orc', 'kudu'),
                    'path':
                    external_path,
                    'primary_keys':
                    primary_keys
                    if table_format == 'kudu' and not load_data else [],
                    'tbl_properties':
                    tbl_properties
                },
                'columns': columns,
                'partition_columns': partition_columns,
                'kudu_partition_columns': kudu_partition_columns,
                'database': database
            })
        if file_encoding and file_encoding != 'ASCII' and file_encoding != 'utf-8' and not use_temp_table:
            sql += '\n\nALTER TABLE `%(database)s`.`%(final_table_name)s` ' \
                   'SET serdeproperties ("serialization.encoding"="%(file_encoding)s");' % {
                       'database': database,
                       'final_table_name': final_table_name,
                       'file_encoding': file_encoding
                   }

        if table_format in ('text', 'json', 'csv',
                            'regexp') and not external and load_data:
            form_data = {
                'path':
                source_path,
                'overwrite':
                False,
                'partition_columns':
                [(partition['name'], partition['partitionValue'])
                 for partition in partition_columns],
            }
            query_server_config = dbms.get_query_server_config(
                name=source_type)
            db = dbms.get(self.user, query_server=query_server_config)
            sql += "\n\n%s;" % db.load_data(
                database, table_name, form_data, None, generate_ddl_only=True)

        if load_data and use_temp_table:
            file_format = 'TextFile' if table_format == 'text' else table_format
            if table_format == 'kudu':
                columns_list = [
                    '`%s`' % col for col in primary_keys + [
                        col['name'] for col in destination['columns']
                        if col['name'] not in primary_keys and col['keep']
                    ]
                ]
                extra_create_properties = """PRIMARY KEY (%(primary_keys)s)
        PARTITION BY HASH PARTITIONS 16
        STORED AS %(file_format)s
        TBLPROPERTIES(
        'kudu.num_tablet_replicas' = '1'
        )""" % {
                    'file_format': file_format,
                    'primary_keys': ', '.join(primary_keys)
                }
            else:
                columns_list = ['*']
                extra_create_properties = 'STORED AS %(file_format)s' % {
                    'file_format': file_format
                }
                if is_transactional:
                    extra_create_properties += '\nTBLPROPERTIES("transactional"="true", "transactional_properties"="%s")' % \
                        default_transactional_type

            sql += '''\n\nCREATE TABLE `%(database)s`.`%(final_table_name)s`%(comment)s
        %(extra_create_properties)s
        AS SELECT %(columns_list)s
        FROM `%(database)s`.`%(table_name)s`;''' % {
                'database': database,
                'final_table_name': final_table_name,
                'table_name': table_name,
                'extra_create_properties': extra_create_properties,
                'columns_list': ', '.join(columns_list),
                'comment': ' COMMENT "%s"' % comment if comment else ''
            }
            sql += '\n\nDROP TABLE IF EXISTS `%(database)s`.`%(table_name)s`;\n' % {
                'database': database,
                'table_name': table_name
            }
            if file_encoding and file_encoding != 'ASCII' and file_encoding != 'utf-8':
                sql += '\n\nALTER TABLE `%(database)s`.`%(final_table_name)s` ' \
                       'SET serdeproperties ("serialization.encoding"="%(file_encoding)s");' % {
                    'database': database,
                    'final_table_name': final_table_name,
                    'file_encoding': file_encoding
                }

        on_success_url = reverse('metastore:describe_table',
                                 kwargs={
                                     'database': database,
                                     'table': final_table_name
                                 }) + '?source_type=' + source_type

        return make_notebook(name=_('Creating table %(database)s.%(table)s') %
                             {
                                 'database': database,
                                 'table': final_table_name
                             },
                             editor_type=editor_type,
                             statement=sql.strip(),
                             status='ready',
                             database=database,
                             on_success_url=on_success_url,
                             last_executed=start_time,
                             is_task=True)
Example #44
0
 def remove_acl(self, path):
     path = Hdfs.normpath(path)
     params = self._getparams()
     params['op'] = 'REMOVEACL'
     return self._root.put(path, params)
Example #45
0
 def set_acl(self, path, aclspec):
     path = Hdfs.normpath(path)
     params = self._getparams()
     params['op'] = 'SETACL'
     params['aclspec'] = aclspec
     return self._root.put(path, params)
Example #46
0
    def run(self,
            request,
            collection_name,
            envelope,
            input_path,
            start_time=None,
            lib_path=None):
        workspace_path = self._upload_workspace(envelope)
        if lib_path is None:
            lib_path = CONFIG_JARS_LIBS_PATH.get()

        task = make_notebook(
            name=_('Indexing into %s') % collection_name,
            editor_type='notebook',
            #on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
            #pub_sub_url='assist.collections.refresh',
            is_task=True,
            is_notebook=True,
            last_executed=start_time)

        if not DISABLE_HUE_3.config.default_value or True:  # CDH5
            shell_command_name = "pipeline.sh"
            shell_command = """#!/bin/bash

export SPARK_DIST_CLASSPATH=`hadoop classpath`
export SPARK_DIST_CLASSPATH=/etc/hive/conf:`hadoop classpath`
export JAVA_HOME=/usr/java/jdk1.8.0_162

SPARK_KAFKA_VERSION=0.10 spark2-submit envelope.jar envelope.conf"""
            hdfs_shell_cmd_path = os.path.join(workspace_path,
                                               shell_command_name)
            self.fs.do_as_user(self.username,
                               self.fs.create,
                               hdfs_shell_cmd_path,
                               data=shell_command)
            task.add_shell_snippet(shell_command=shell_command_name,
                                   files=[{
                                       u'value':
                                       u'%s/envelope.conf' % workspace_path
                                   }, {
                                       u'value': hdfs_shell_cmd_path
                                   }, {
                                       u'value': lib_path
                                   }])
        else:
            task.add_spark_snippet(
                clazz='com.cloudera.labs.envelope.EnvelopeMain',
                jars=Hdfs.basename(lib_path),
                arguments=[u'envelope.conf'],
                files=[
                    {
                        u'path': u'%s/envelope.conf' % workspace_path,
                        u'type': u'file'
                    },
                    {
                        u'path': lib_path,
                        u'type': u'file'
                    },
                ])

        return task.execute(request, batch=True)