Example #1
0
def execute_and_watch(request):
  notebook_id = request.GET.get('editor', request.GET.get('notebook'))
  snippet_id = int(request.GET['snippet'])
  action = request.GET['action']
  destination = request.GET['destination']

  notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data()
  snippet = notebook['snippets'][snippet_id]
  editor_type = snippet['type']

  api = get_api(request, snippet)

  if action == 'save_as_table':
    sql, success_url = api.export_data_as_table(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')
  elif action == 'insert_as_query':
    sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')
  else:
    raise PopupException(_('Action %s is unknown') % action)

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
          'success_url': success_url
      }),
      'editor_type': editor_type,
  })
Example #2
0
def create_database(request, source, destination):
  database = destination['name']
  comment = destination['description']

  use_default_location = destination['useDefaultLocation']
  external_path = destination['nonDefaultLocation']

  sql = django_mako.render_to_string("gen/create_database_statement.mako", {
      'database': {
          'name': database,
          'comment': comment,
          'use_default_location': use_default_location,
          'external_location': external_path,
          'properties': [],
      }
    }
  )

  editor_type = 'hive'
  on_success_url = reverse('metastore:show_tables', kwargs={'database': database})

  try:
    notebook = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready', on_success_url=on_success_url)
    return notebook.execute(request, batch=False)
  except Exception, e:
    raise PopupException(_('The table could not be created.'), detail=e.message)
Example #3
0
File: views.py Project: mapr/hue
def drop_partition(request, database, table):
  source_type = request.POST.get('source_type', 'hive')
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

  if request.method == 'POST':
    partition_specs = request.POST.getlist('partition_selection')
    partition_specs = [spec for spec in partition_specs]
    try:
      if request.GET.get("format", "html") == "json":
        last_executed = json.loads(request.POST.get('start_time'), '-1')
        sql = db.drop_partitions(database, table, partition_specs, design=None, generate_ddl_only=True)
        job = make_notebook(
            name=_('Drop partition %s') % ', '.join(partition_specs)[:100],
            editor_type=source_type,
            statement=sql.strip(),
            status='ready',
            database=None,
            on_success_url='assist.db.refresh',
            is_task=True,
            last_executed=last_executed
        )
        return JsonResponse(job.execute(request))
      else:
        design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())
        query_history = db.drop_partitions(database, table, partition_specs, design)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \
              reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(partition)s.  Error: %(error)s") % {'partition': '\n'.join(partition_specs), 'error': error_message}
      raise PopupException(error, title=_("DB Error"), detail=log)
Example #4
0
File: views.py Project: 10sr/hue
def drop_table(request, database):
  db = dbms.get(request.user)

  if request.method == 'POST':
    try:
      tables = request.POST.getlist('table_selection')
      tables_objects = [db.get_table(database, table) for table in tables]
      skip_trash = request.POST.get('skip_trash') == 'on'

      if request.POST.get('is_embeddable'):
        sql = db.drop_tables(database, tables_objects, design=None, skip_trash=skip_trash, generate_ddl_only=True)
        job = make_notebook(
            name='Execute and watch',
            editor_type='hive',
            statement=sql.strip(),
            status='ready',
            database=database,
            on_success_url='assist.db.refresh',
            is_task=True
        )
        return JsonResponse(job.execute(request))
      else:
        # Can't be simpler without an important refactoring
        design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())
        query_history = db.drop_tables(database, tables_objects, design, skip_trash=skip_trash)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:show_tables', kwargs={'database': database})
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(tables)s.  Error: %(error)s") % {'tables': ','.join(tables), 'error': error_message}
      raise PopupException(error, title=_("Hive Error"), detail=log)
Example #5
0
File: views.py Project: 10sr/hue
def drop_database(request):
  db = dbms.get(request.user)

  if request.method == 'POST':
    databases = request.POST.getlist('database_selection')

    try:
      design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())

      if request.POST.get('is_embeddable'):
        sql = db.drop_databases(databases, design, generate_ddl_only=True)
        job = make_notebook(
            name='Execute and watch',
            editor_type='hive',
            statement=sql.strip(),
            status='ready',
            database=None,
            on_success_url='assist.db.refresh',
            is_task=True
        )
        return JsonResponse(job.execute(request))
      else:
        query_history = db.drop_databases(databases, design)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:databases')
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(databases)s.  Error: %(error)s") % {'databases': ','.join(databases), 'error': error_message}
      raise PopupException(error, title=_("Hive Error"), detail=log)
Example #6
0
  def install_pig_script(self, sample_user):
    doc2 = None
    name = _('UpperText')

    if Document2.objects.filter(owner=sample_user, name=name, type='query-pig', is_history=False).exists():
      LOG.info("Sample pig editor script already installed.")
      doc2 = Document2.objects.get(owner=sample_user, name=name, type='query-pig', is_history=False)
    else:
      statement = """REGISTER hdfs://{}/piggybank.jar;

data = LOAD '{}/data/midsummer.txt' as (text:CHARARRAY);

upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);

STORE upper_case INTO '$output';
""".format(REMOTE_SAMPLE_DIR.get(), REMOTE_SAMPLE_DIR.get())
      snippet_properties = {
        'hadoopProperties': [],
        'parameters': [],
        'resources': []
      }

      notebook = make_notebook(
        name=name,
        description=_('UpperText: Example Pig script'),
        editor_type='pig',
        statement=statement,
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('files')
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=sample_user,
            name=data['name'],
            type='query-pig',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample pig script document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #7
0
    def kill_query(self, query_id, request):
        kill_sql = 'KILL QUERY "%s";' % query_id
        job = make_notebook(
            name=_('Kill query %s') % query_id,
            editor_type='hive',
            statement=kill_sql,
            status='ready',
            on_success_url='assist.db.refresh',
            is_task=False,
        )

        job.execute_and_wait(request)
Example #8
0
def browse(request, database, table, partition_spec=None):
  snippet = {'type': request.POST.get('sourceType', 'hive')}

  statement = get_api(request, snippet).get_browse_query(snippet, database, table, partition_spec)
  editor_type = snippet['type']

  if request.method == 'POST':
    notebook = make_notebook(name='Execute and watch', editor_type=editor_type, statement=statement, status='ready-execute', is_task=True)
    return JsonResponse(notebook.execute(request, batch=False))
  else:
    editor = make_notebook(name='Browse', editor_type=editor_type, statement=statement, status='ready-execute')

    return render('editor.mako', request, {
        'notebooks_json': json.dumps([editor.get_data()]),
        'options_json': json.dumps({
            'languages': get_ordered_interpreters(request.user),
            'mode': 'editor',
            'editor_type': editor_type
        }),
        'editor_type': editor_type,
    })
Example #9
0
def drop_table(request, database):
    source_type = request.POST.get('source_type', 'hive')
    db = _get_db(user=request.user, source_type=source_type)

    if request.method == 'POST':
        try:
            tables = request.POST.getlist('table_selection')
            tables_objects = [
                db.get_table(database, table) for table in tables
            ]
            skip_trash = request.POST.get('skip_trash') == 'on'

            if request.POST.get('is_embeddable'):
                last_executed = json.loads(request.POST.get('start_time'),
                                           '-1')
                sql = db.drop_tables(database,
                                     tables_objects,
                                     design=None,
                                     skip_trash=skip_trash,
                                     generate_ddl_only=True)
                job = make_notebook(
                    name=_('Drop table %s') %
                    ', '.join([table.name for table in tables_objects])[:100],
                    editor_type=source_type,
                    statement=sql.strip(),
                    status='ready',
                    database=database,
                    on_success_url='assist.db.refresh',
                    is_task=True,
                    last_executed=last_executed)
                return JsonResponse(job.execute(request))
            else:
                # Can't be simpler without an important refactoring
                design = SavedQuery.create_empty(app_name='beeswax',
                                                 owner=request.user,
                                                 data=hql_query('').dumps())
                query_history = db.drop_tables(database,
                                               tables_objects,
                                               design,
                                               skip_trash=skip_trash)
                url = reverse('beeswax:watch_query_history',
                              kwargs={'query_history_id': query_history.id
                                      }) + '?on_success_url=' + reverse(
                                          'metastore:show_tables',
                                          kwargs={'database': database})
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(tables)s.  Error: %(error)s") % {
                'tables': ','.join(tables),
                'error': error_message
            }
            raise PopupException(error, title=_("DB Error"), detail=log)
Example #10
0
    def create_table_from_kafka(self,
                                source,
                                destination,
                                start_time=-1,
                                dry_run=False):
        if '.' in destination['name']:
            database, table_name = destination['name'].split('.', 1)
        else:
            database = 'default'
            table_name = destination['name']
        final_table_name = table_name

        source_type = source['sourceType']

        interpreter = _get_interpreter_from_dialect('flink', self.user)
        editor_type = interpreter['type']  # destination['sourceType']

        columns = destination['columns']

        sql = '''CREATE TABLE %(table_name)s (
%(columns)s
) WITH (
  'connector' = 'kafka',
  'topic' = '%(topic)s',
  'scan.startup.mode' = 'earliest-offset',
  'properties.bootstrap.servers' = 'kafka:9094',
  'format' = 'json'
);''' % {
            'database': database,
            'table_name': table_name,
            'columns': ',\n'.join(
                ['  %(name)s %(type)s' % col for col in columns]),
            'topic': source.get('kafkaSelectedTopics')
        }

        if dry_run:
            return sql
        else:
            on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': final_table_name}) + \
                '?source_type=' + source_type

            return make_notebook(
                name=_('Creating table %(database)s.%(table)s') % {
                    'database': database,
                    'table': final_table_name
                },
                editor_type=editor_type,
                statement=sql.strip(),
                status='ready',
                database=database,
                on_success_url=on_success_url,
                last_executed=start_time,
                is_task=True)
Example #11
0
  def _install_mapreduce_example(self):
    doc2 = None
    name = _('MapReduce Sleep Job')

    if Document2.objects.filter(owner=self.user, name=name, type='query-mapreduce', is_history=False).exists():
      LOG.info("Sample mapreduce editor job already installed.")
      doc2 = Document2.objects.get(owner=self.user, name=name, type='query-mapreduce', is_history=False)
    else:
      snippet_properties = {
        'app_jar': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
        'hadoopProperties': ['mapred.mapper.class=org.apache.hadoop.examples.SleepJob',
          'mapred.reducer.class=org.apache.hadoop.examples.SleepJob',
          'mapred.mapoutput.key.class=org.apache.hadoop.io.IntWritable',
          'mapred.mapoutput.value.class=org.apache.hadoop.io.NullWritable',
          'mapred.output.format.class=org.apache.hadoop.mapred.lib.NullOutputFormat',
          'mapred.input.format.class=org.apache.hadoop.examples.SleepJob$SleepInputFormat',
          'mapred.partitioner.class=org.apache.hadoop.examples.SleepJob',
          'sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'],
        'archives': [],
        'jars': []
      }

      notebook = make_notebook(
        name=name,
        description=_('Sleep: Example MapReduce job'),
        editor_type='mapreduce',
        statement='',
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=self.user,
            name=data['name'],
            type='query-mapreduce',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample mapreduce job document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #12
0
    def _install_pyspark_example(self):
        doc2 = None
        name = _('PySpark Pi Estimator Job')

        if Document2.objects.filter(owner=self.user,
                                    name=name,
                                    type='query-spark2',
                                    is_history=False).exists():
            LOG.info("Sample pyspark editor job already installed.")
            doc2 = Document2.objects.get(owner=self.user,
                                         name=name,
                                         type='query-spark2',
                                         is_history=False)
        else:
            snippet_properties = {
                'jars': ['/user/hue/oozie/workspaces/lib/pi.py'],
                'class': '',
                'app_name': '',
                'spark_opts': [],
                'spark_arguments': [],
                'files': []
            }

            notebook = make_notebook(
                name=name,
                description=_('Pi Estimator: Example PySpark job'),
                editor_type='spark2',
                statement='',
                status='ready',
                snippet_properties=snippet_properties,
                is_saved=True)

            # Remove files, functions, settings from snippet properties
            data = notebook.get_data()
            data['snippets'][0]['properties'].pop('functions')
            data['snippets'][0]['properties'].pop('settings')

            try:
                with transaction.atomic():
                    doc2 = Document2.objects.create(
                        owner=self.user,
                        name=data['name'],
                        type='query-spark2',
                        description=data['description'],
                        data=json.dumps(data))
            except Exception, e:
                LOG.exception(
                    "Failed to create sample PySpark job document: %s" % e)
                # Just to be sure we delete Doc2 object incase of exception.
                # Possible when there are mixed InnoDB and MyISAM tables
                if doc2 and Document2.objects.filter(id=doc2.id).exists():
                    doc2.delete()
Example #13
0
    def run_morphline(self, collection_name, morphline, input_path):
        workspace_path = self._upload_workspace(morphline)

        snippet_properties = {
            u'files': [{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }],
            u'class':
            u'org.apache.solr.hadoop.MapReduceIndexerTool',
            u'app_jar':
            CONFIG_INDEXER_LIBS_PATH.get(),
            u'arguments': [
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                zkensemble(),
                u'--collection',
                collection_name,
                u'${nameNode}%s' % input_path,
            ],
            u'archives': [],
        }

        notebook = make_notebook(
            name='Indexer',
            editor_type='java',
            snippet_properties=snippet_properties).get_data()
        notebook_doc, created = _save_notebook(notebook, self.user)

        workflow_doc = WorkflowBuilder().create_workflow(
            document=notebook_doc,
            user=self.user,
            managed=True,
            name=_("Batch job for %s") % notebook_doc.name)
        workflow = Workflow(document=workflow_doc, user=self.user)

        job_id = _submit_workflow(user=self.user,
                                  fs=self.fs,
                                  jt=self.jt,
                                  workflow=workflow,
                                  mapping=None)

        return job_id
Example #14
0
  def _install_mapreduce_example(self):
    doc2 = None
    name = _('MapReduce Sleep Job')

    if Document2.objects.filter(owner=self.user, name=name, type='query-mapreduce', is_history=False).exists():
      LOG.info("Sample mapreduce editor job already installed.")
      doc2 = Document2.objects.get(owner=self.user, name=name, type='query-mapreduce', is_history=False)
    else:
      snippet_properties = {
        'app_jar': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
        'hadoopProperties': ['mapred.mapper.class=org.apache.hadoop.examples.SleepJob',
          'mapred.reducer.class=org.apache.hadoop.examples.SleepJob',
          'mapred.mapoutput.key.class=org.apache.hadoop.io.IntWritable',
          'mapred.mapoutput.value.class=org.apache.hadoop.io.NullWritable',
          'mapred.output.format.class=org.apache.hadoop.mapred.lib.NullOutputFormat',
          'mapred.input.format.class=org.apache.hadoop.examples.SleepJob$SleepInputFormat',
          'mapred.partitioner.class=org.apache.hadoop.examples.SleepJob',
          'sleep.job.map.sleep.time=5', 'sleep.job.reduce.sleep.time=10'],
        'archives': [],
        'jars': []
      }

      notebook = make_notebook(
        name=name,
        description=_('Sleep: Example MapReduce job'),
        editor_type='mapreduce',
        statement='',
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=self.user,
            name=data['name'],
            type='query-mapreduce',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample mapreduce job document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #15
0
def import_saved_beeswax_query(bquery):
  design = bquery.get_design()

  return make_notebook(
      name=bquery.name,
      description=bquery.desc,
      editor_type=_convert_type(bquery.type),
      statement=design.hql_query,
      status='ready',
      files=design.file_resources,
      functions=design.functions,
      settings=design.settings
  )
Example #16
0
def import_saved_beeswax_query(bquery):
  design = bquery.get_design()

  return make_notebook(
      name=bquery.name,
      description=bquery.desc,
      editor_type=_convert_type(bquery.type, bquery.data),
      statement=design.hql_query,
      status='ready',
      files=design.file_resources,
      functions=design.functions,
      settings=design.settings
  )
Example #17
0
  def _load_data_to_table(self, django_user, hql):
    LOG.info('Loading data into table "%s"' % (self.name,))

    job = make_notebook(
        name=_('Insert data in sample table %s') % self.name,
        editor_type=self.interpreter['type'] if self.interpreter else self.dialect,
        statement=hql,
        status='ready',
        database=self.db_name,
        on_success_url='assist.db.refresh',
        is_task=False,
    )
    job.execute_and_wait(self.request)
Example #18
0
  def install_pig_script(self, sample_user):
    doc2 = None
    name = _('UpperText')

    if Document2.objects.filter(owner=sample_user, name=name, type='query-pig').exists():
      LOG.info("Sample pig editor script already installed.")
      doc2 = Document2.objects.get(owner=sample_user, name=name, type='query-pig')
    else:
      statement = """data = LOAD '/user/hue/pig/examples/data/midsummer.txt' as (text:CHARARRAY);

upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);

STORE upper_case INTO '${output}';
"""
      snippet_properties = {
        'hadoopProperties': [],
        'parameters': [],
        'resources': []
      }

      notebook = make_notebook(
        name=name,
        description=_('UpperText: Example Pig script'),
        editor_type='pig',
        statement=statement,
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('files')
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=sample_user,
            name=data['name'],
            type='query-pig',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample pig script document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #19
0
def drop_database(request):
    source_type = request.POST.get('source_type', 'hive')
    db = _get_db(user=request.user, source_type=source_type)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            if request.POST.get('is_embeddable'):
                design = SavedQuery.create_empty(
                    app_name=source_type
                    if source_type != 'hive' else 'beeswax',
                    owner=request.user,
                    data=hql_query('').dumps())
                last_executed = json.loads(request.POST.get('start_time'),
                                           '-1')
                cluster = json.loads(request.POST.get('cluster', '{}'))
                namespace = request.POST.get('namespace')
                sql = db.drop_databases(databases,
                                        design,
                                        generate_ddl_only=True)
                job = make_notebook(name=_('Drop database %s') %
                                    ', '.join(databases)[:100],
                                    editor_type=source_type,
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    namespace=namespace,
                                    compute=cluster,
                                    on_success_url='assist.db.refresh',
                                    is_task=True,
                                    last_executed=last_executed)
                return JsonResponse(job.execute(request))
            else:
                design = SavedQuery.create_empty(app_name='beeswax',
                                                 owner=request.user,
                                                 data=hql_query('').dumps())
                query_history = db.drop_databases(databases, design)
                url = reverse(
                    'beeswax:watch_query_history',
                    kwargs={
                        'query_history_id': query_history.id
                    }) + '?on_success_url=' + reverse('metastore:databases')
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("DB Error"), detail=log)
Example #20
0
File: api.py Project: Nick-Xu/hue
def export_result(request):
  response = {'status': -1, 'message': _('Exporting result failed.')}

  # Passed by check_document_access_permission but unused by APIs
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  data_format = json.loads(request.POST.get('format', 'hdfs-file'))
  destination = json.loads(request.POST.get('destination', ''))
  overwrite = json.loads(request.POST.get('overwrite', 'false'))
  is_embedded = json.loads(request.POST.get('is_embedded', 'false'))

  api = get_api(request, snippet)

  if data_format == 'hdfs-file': # Blocking operation, like downloading
    if request.fs.isdir(destination):
      if notebook.get('name'):
        destination += '/%(name)s.csv' % notebook
      else:
        destination += '/%(type)s-%(id)s.csv' % notebook
    if overwrite and request.fs.exists(destination):
      request.fs.do_as_user(request.user.username, request.fs.rmtree, destination)
    response['watch_url'] = api.export_data_as_hdfs_file(snippet, destination, overwrite)
    response['status'] = 0
  elif data_format == 'hive-table':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=save_as_table&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0
  elif data_format == 'hdfs-directory':
    if is_embedded:
      sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)

      task = make_notebook(
        name='Execute and watch',
        editor_type=snippet['type'],
        statement=sql,
        status='ready-execute',
        database=snippet['database'],
        on_success_url=success_url,
        is_task=True
      )
      response = task.execute(request)
    else:
      notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
      response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=insert_as_query&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
      response['status'] = 0
  elif data_format == 'search-index':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=index_query&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0

  return JsonResponse(response)
Example #21
0
  def _install_spark_example(self):
    doc2 = None
    name = _('Spark File Copy Job')

    if Document2.objects.filter(owner=self.user, name=name, type='query-spark2', is_history=False).exists():
      LOG.info("Sample Spark editor job already installed.")
      doc2 = Document2.objects.get(owner=self.user, name=name, type='query-spark2', is_history=False)
    else:
      snippet_properties = {
        'jars': ['/user/hue/oozie/workspaces/workflows/spark-scala/lib/oozie-examples.jar'],
        'class': 'org.apache.oozie.example.SparkFileCopy',
        'app_name': '',
        'spark_opts': [],
        'spark_arguments': [
          "/user/hue/oozie/workspaces/data/sonnets.txt",
          "sonnets"
        ],
        'files': []
      }

      notebook = make_notebook(
        name=name,
        description=_('File Copy: Example Spark job'),
        editor_type='spark2',
        statement='',
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=self.user,
            name=data['name'],
            type='query-spark2',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample Spark job document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #22
0
def drop_partition(request, database, table):
    source_type = request.POST.get('source_type', 'hive')
    cluster = json.loads(request.POST.get('cluster', '{}'))

    db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

    if request.method == 'POST':
        partition_specs = request.POST.getlist('partition_selection')
        partition_specs = [spec for spec in partition_specs]
        try:
            if request.GET.get("format", "html") == "json":
                last_executed = json.loads(request.POST.get('start_time'),
                                           '-1')
                sql = db.drop_partitions(database,
                                         table,
                                         partition_specs,
                                         design=None,
                                         generate_ddl_only=True)
                job = make_notebook(name=_('Drop partition %s') %
                                    ', '.join(partition_specs)[:100],
                                    editor_type=source_type,
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    on_success_url='assist.db.refresh',
                                    is_task=True,
                                    last_executed=last_executed)
                return JsonResponse(job.execute(request))
            else:
                design = SavedQuery.create_empty(app_name='beeswax',
                                                 owner=request.user,
                                                 data=hql_query('').dumps())
                query_history = db.drop_partitions(database, table,
                                                   partition_specs, design)
                url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \
                      reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
                return redirect(url)
        except Exception as ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(partition)s.  Error: %(error)s") % {
                'partition': '\n'.join(partition_specs),
                'error': error_message
            }
            raise PopupException(error, title=_("DB Error"), detail=log)
    else:
        title = _("Do you really want to delete the partition(s)?")
        return render('confirm.mako', request, {
            'url': request.path,
            'title': title
        })
Example #23
0
  def _install_java_example(self):
    doc2 = None
    name = _('Java Terasort Job')

    if Document2.objects.filter(owner=self.user, name=name, type='query-java', is_history=False).exists():
      LOG.info("Sample Java editor job already installed.")
      doc2 = Document2.objects.get(owner=self.user, name=name, type='query-java', is_history=False)
    else:
      snippet_properties = {
        'app_jar': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
        'class': 'org.apache.hadoop.examples.terasort.TeraSort',
        'java_opts': '',
        'hadoopProperties': [],
        'archives': [],
        'files': [],
        'arguments': ['output_dir/teragen', 'output_dir/terasort'],
        'capture_output': False
      }

      notebook = make_notebook(
        name=name,
        description=_('Terasort: Example Java job'),
        editor_type='java',
        statement='',
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=self.user,
            name=data['name'],
            type='query-java',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample Java job document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #24
0
  def _install_java_example(self):
    doc2 = None
    name = _('Java TeraGen Job')

    if Document2.objects.filter(owner=self.user, name=name, type='query-java', is_history=False).exists():
      LOG.info("Sample Java editor job already installed.")
      doc2 = Document2.objects.get(owner=self.user, name=name, type='query-java', is_history=False)
    else:
      snippet_properties = {
        'app_jar': '/user/hue/oozie/workspaces/lib/hadoop-examples.jar',
        'class': 'org.apache.hadoop.examples.terasort.TeraGen',
        'java_opts': '',
        'hadoopProperties': [],
        'archives': [],
        'files': [],
        'arguments': ['10000', 'output_dir/teragen'],
        'capture_output': False
      }

      notebook = make_notebook(
        name=name,
        description=_('TeraGen: Generates N rows of random data to a directory.'),
        editor_type='java',
        statement='',
        status='ready',
        snippet_properties=snippet_properties,
        is_saved=True
      )

      # Remove files, functions, settings from snippet properties
      data = notebook.get_data()
      data['snippets'][0]['properties'].pop('functions')
      data['snippets'][0]['properties'].pop('settings')

      try:
        with transaction.atomic():
          doc2 = Document2.objects.create(
            owner=self.user,
            name=data['name'],
            type='query-java',
            description=data['description'],
            data=json.dumps(data)
          )
      except Exception, e:
        LOG.exception("Failed to create sample Java job document: %s" % e)
        # Just to be sure we delete Doc2 object incase of exception.
        # Possible when there are mixed InnoDB and MyISAM tables
        if doc2 and Document2.objects.filter(id=doc2.id).exists():
          doc2.delete()
Example #25
0
    def run(self,
            request,
            collection_name,
            envelope,
            input_path,
            start_time=None,
            lib_path=None):
        workspace_path = self._upload_workspace(envelope)

        task = make_notebook(
            name=_('Indexing into %s') % collection_name,
            editor_type='notebook',
            #on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
            #pub_sub_url='assist.collections.refresh',
            is_task=True,
            is_notebook=True,
            last_executed=start_time)

        if not DISABLE_HUE_3.get():  # CDH5
            shell_command_name = "pipeline.sh"
            shell_command = """#!/bin/bash

SPARK_KAFKA_VERSION=0.10 spark2-submit envelope.jar envelope.conf"""
            hdfs_shell_cmd_path = os.path.join(workspace_path,
                                               shell_command_name)
            self.fs.do_as_user(self.username,
                               self.fs.create,
                               hdfs_shell_cmd_path,
                               data=shell_command)
            task.add_shell_snippet(shell_command=shell_command_name,
                                   files=[{
                                       u'value':
                                       u'%s/envelope.conf' % workspace_path
                                   }, {
                                       u'value': hdfs_shell_cmd_path
                                   }, {
                                       u'value': lib_path,
                                   }])
        else:
            task.add_spark_snippet(clazz=None,
                                   jars=lib_path,
                                   arguments=[u'envelope.conf'],
                                   files=[{
                                       u'path':
                                       u'%s/envelope.conf' % workspace_path,
                                       u'type': u'file'
                                   }])

        return task.execute(request, batch=True)
Example #26
0
    def _sync_execute(self, sql, database):
        editor = make_notebook(name='Execute and watch',
                               editor_type=self.engine,
                               statement=sql,
                               database=database,
                               status='ready-execute',
                               skip_historify=True
                               # async=False
                               )

        request = MockRequest(self.user, self.cluster)
        mock_notebook = {}
        snippet = {'type': self.engine}
        response = editor.execute(request)

        if 'handle' in response:
            snippet['result'] = response

            if response['handle'].get('sync'):
                result = response['result']
            else:
                timeout_sec = 20  # To move to Notebook API
                sleep_interval = 0.5
                curr = time.time()
                end = curr + timeout_sec

                api = get_api(request, snippet)

                while curr <= end:
                    status = api.check_status(mock_notebook, snippet)
                    if status['status'] == 'available':
                        result = api.fetch_result(mock_notebook,
                                                  snippet,
                                                  rows=10,
                                                  start_over=True)
                        api.close_statement(mock_notebook, snippet)
                        break
                    time.sleep(sleep_interval)
                    curr = time.time()

                if curr > end:
                    try:
                        api.cancel_operation(snippet)
                    except Exception as e:
                        LOG.warning("Failed to cancel query: %s" % e)
                        api.close_statement(mock_notebook, snippet)
                    raise OperationTimeout(e)

        return result
Example #27
0
def alanize_fix(request):
    response = {'status': -1}
    fix = json.loads(request.POST.get('fix'))
    start_time = json.loads(request.POST.get('start_time'), '-1')
    if fix['id'] == 0:
        notebook = make_notebook(name=_('compute stats %(data)s') % fix,
                                 editor_type='impala',
                                 statement='compute stats %(data)s' % fix,
                                 status='ready',
                                 last_executed=start_time,
                                 is_task=True)
        response['details'] = {'task': notebook.execute(request, batch=True)}
        response['status'] = 0

    return JsonResponse(response)
Example #28
0
File: views.py Project: mapr/hue
def load_table(request, database, table):
  response = {'status': -1, 'data': 'None'}

  source_type = request.POST.get('source_type', request.GET.get('source_type', 'hive'))
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

  table = db.get_table(database, table)

  if request.method == "POST":
    load_form = LoadDataForm(table, request.POST)

    if load_form.is_valid():
      on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': table.name})
      generate_ddl_only = request.POST.get('is_embeddable', 'false') == 'true'
      try:
        design = SavedQuery.create_empty(app_name=source_type if source_type != 'hive' else 'beeswax', owner=request.user, data=hql_query('').dumps())
        form_data = {
          'path': load_form.cleaned_data['path'],
          'overwrite': load_form.cleaned_data['overwrite'],
          'partition_columns': [(column_name, load_form.cleaned_data[key]) for key, column_name in load_form.partition_columns.iteritems()],
        }
        query_history = db.load_data(database, table.name, form_data, design, generate_ddl_only=generate_ddl_only)
        if generate_ddl_only:
          last_executed = json.loads(request.POST.get('start_time'), '-1')
          job = make_notebook(
            name=_('Load data in %s.%s') % (database, table.name),
            editor_type=source_type,
            statement=query_history.strip(),
            status='ready',
            database=database,
            on_success_url='assist.db.refresh',
            is_task=True,
            last_executed=last_executed
          )
          response = job.execute(request)
        else:
          url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + on_success_url
          response['status'] = 0
          response['data'] = url
          response['query_history_id'] = query_history.id
      except QueryError, ex:
        response['status'] = 1
        response['data'] = _("Can't load the data: ") + ex.message
      except Exception, e:
        response['status'] = 1
        response['data'] = _("Can't load the data: ") + str(e)
Example #29
0
def load_table(request, database, table):
  response = {'status': -1, 'data': 'None'}

  source_type = request.POST.get('source_type', 'hive')
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

  table = db.get_table(database, table)

  if request.method == "POST":
    load_form = LoadDataForm(table, request.POST)

    if load_form.is_valid():
      on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': table.name})
      generate_ddl_only = request.POST.get('is_embeddable', 'false') == 'true'
      try:
        design = SavedQuery.create_empty(app_name=source_type if source_type != 'hive' else 'beeswax', owner=request.user, data=hql_query('').dumps())
        form_data = {
          'path': load_form.cleaned_data['path'],
          'overwrite': load_form.cleaned_data['overwrite'],
          'partition_columns': [(column_name, load_form.cleaned_data[key]) for key, column_name in load_form.partition_columns.iteritems()],
        }
        query_history = db.load_data(database, table.name, form_data, design, generate_ddl_only=generate_ddl_only)
        if generate_ddl_only:
          last_executed = json.loads(request.POST.get('start_time'), '-1')
          job = make_notebook(
            name=_('Load data in %s.%s') % (database, table.name),
            editor_type=source_type,
            statement=query_history.strip(),
            status='ready',
            database=database,
            on_success_url='assist.db.refresh',
            is_task=True,
            last_executed=last_executed
          )
          response = job.execute(request)
        else:
          url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + on_success_url
          response['status'] = 0
          response['data'] = url
          response['query_history_id'] = query_history.id
      except QueryError, ex:
        response['status'] = 1
        response['data'] = _("Can't load the data: ") + ex.message
      except Exception, e:
        response['status'] = 1
        response['data'] = _("Can't load the data: ") + str(e)
Example #30
0
def browse(request, database, table):
  editor_type = request.GET.get('type', 'hive')

  snippet = {'type': editor_type}
  sql_select = get_api(request.user, snippet, request.fs, request.jt).get_select_star_query(snippet, database, table)

  editor = make_notebook(name='Browse', editor_type=editor_type, statement=sql_select, status='ready-execute')

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
      }),
      'editor_type': editor_type,
  })
Example #31
0
    def list_tasks(self, user):
        sql_query = "SELECT * FROM information_schema.scheduled_queries"

        job = make_notebook(
            name='List Hive schedules',
            editor_type='hive',
            statement=sql_query,
            status='ready',
            database='default',
            is_task=False,
        )
        request = MockRequest(user)

        handle = job.execute_and_wait(request, include_results=True)

        return [self._get_task(row) for row in handle['result']['data']]
Example #32
0
def browse(request, database, table):
  editor_type = request.GET.get('type', 'hive')

  snippet = {'type': editor_type}
  sql_select = get_api(request, snippet).get_select_star_query(snippet, database, table)

  editor = make_notebook(name='Browse', editor_type=editor_type, statement=sql_select, status='ready-execute')

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
      }),
      'editor_type': editor_type,
  })
Example #33
0
  def _sync_execute(self, sql, database):
    editor = make_notebook(
        name='Execute and watch',
        editor_type=self.engine,
        statement=sql,
        database=database,
        status='ready-execute',
        skip_historify=True
        # async=False
    )

    request = MockRequest(self.user)
    mock_notebook = {}
    snippet = {'type': self.engine}
    response = editor.execute(request)


    if 'handle' in response:
      snippet['result'] = response

      if response['handle'].get('sync'):
        result = response['result']
      else:
        timeout_sec = 20 # To move to Notebook API
        sleep_interval = 0.5
        curr = time.time()
        end = curr + timeout_sec

        api = get_api(request, snippet)

        while curr <= end:
          status = api.check_status(mock_notebook, snippet)
          if status['status'] == 'available':
            result = api.fetch_result(mock_notebook, snippet, rows=10, start_over=True)
            api.close_statement(snippet)
            break
          time.sleep(sleep_interval)
          curr = time.time()

        if curr > end:
          try:
            api.cancel_operation(snippet)
          except Exception, e:
            LOG.warning("Failed to cancel query: %s" % e)
            api.close_statement(snippet)
          raise OperationTimeout(e)
Example #34
0
    def run(self,
            request,
            collection_name,
            configs,
            input_path,
            start_time=None,
            lib_path=None):
        workspace_path = self._upload_workspace(configs)

        if lib_path is None:
            lib_path = CONFIG_JARS_LIBS_PATH.get()

        task = make_notebook(
            name=_('Indexing into %s') % collection_name,
            editor_type='notebook',
            #on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
            #pub_sub_url='assist.collections.refresh',
            is_task=True,
            is_notebook=True,
            last_executed=start_time)

        shell_command_name = "pipeline.sh"
        shell_command = """#!/bin/bash

export SPARK_DIST_CLASSPATH=`hadoop classpath`
export SPARK_DIST_CLASSPATH=/etc/hive/conf:`hadoop classpath`
export JAVA_HOME=/usr/java/jdk1.8.0_162

SPARK_KAFKA_VERSION=0.10 spark2-submit envelope.jar envelope.conf"""
        hdfs_shell_cmd_path = os.path.join(workspace_path, shell_command_name)
        self.fs.do_as_user(self.username,
                           self.fs.create,
                           hdfs_shell_cmd_path,
                           data=shell_command)
        task.add_shell_snippet(shell_command=shell_command_name,
                               files=[{
                                   u'value':
                                   u'%s/envelope.conf' % workspace_path
                               }, {
                                   u'value': hdfs_shell_cmd_path
                               }, {
                                   u'value': lib_path
                               }])

        return task.execute(request, batch=True)
Example #35
0
def browse(request, database, table):
    snippet = {"type": "hive"}
    sql_select = get_api(request, snippet).get_select_star_query(snippet, database, table)

    editor_type = snippet["type"]
    editor = make_notebook(name="Browse", editor_type=editor_type, statement=sql_select, status="ready-execute")

    return render(
        "editor.mako",
        request,
        {
            "notebooks_json": json.dumps([editor.get_data()]),
            "options_json": json.dumps(
                {"languages": get_ordered_interpreters(request.user), "mode": "editor", "editor_type": editor_type}
            ),
            "editor_type": editor_type,
        },
    )
Example #36
0
File: api.py Project: cloudera/hue
def alanize_fix(request):
  response = {'status': -1}
  cluster = json.loads(request.POST.get('cluster', '{}'))
  fix = json.loads(request.POST.get('fix'))
  start_time = json.loads(request.POST.get('start_time'), '-1')
  if fix['id'] == 0:
    notebook = make_notebook(
      name=_('compute stats %(data)s') % fix,
      editor_type='impala',
      statement='compute stats %(data)s' % fix,
      status='ready',
      last_executed=start_time,
      is_task=True,
      compute=cluster
    )
    response['details'] = { 'task': notebook.execute(request, batch=True) }
    response['status'] = 0

  return JsonResponse(response)
Example #37
0
File: api.py Project: JohnWey/hue
def _get_sample_data(db, database, table, column, is_async=False, cluster=None, operation=None):
  if operation == 'hello':
    table_obj = None
  else:
    table_obj = db.get_table(database, table)
    if table_obj.is_impala_only and db.client.query_server['server_name'] != 'impala':  # Kudu table, now Hive should support it though
      query_server = get_query_server_config('impala', connector=cluster)
      db = dbms.get(db.client.user, query_server, cluster=cluster)

  sample_data = db.get_sample(database, table_obj, column, generate_sql_only=is_async, operation=operation)
  response = {'status': -1}

  if sample_data:
    response['status'] = 0
    if is_async:
      notebook = make_notebook(
          name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column},
          editor_type=_get_servername(db),
          statement=sample_data,
          status='ready-execute',
          skip_historify=True,
          is_task=False,
          compute=cluster if cluster else None
      )
      response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False)
      if table_obj.is_impala_only:
        response['result']['type'] = 'impala'
    else:
      sample = escape_rows(sample_data.rows(), nulls_only=True)
      if column:
        sample = set([row[0] for row in sample])
        sample = [[item] for item in sorted(list(sample))]

      response['headers'] = sample_data.cols()
      response['full_headers'] = sample_data.full_cols()
      response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response
Example #38
0
    def submit_schedule(self, request, coordinator, mapping):
        """
    coordinator
      Document2.objects.get(uuid=coordinator.get_data_for_json()['properties']['document'])

    mapping
      {u'oozie.use.system.libpath': u'True', 'dryrun': False, u'start_date': u'2019-08-10T17:02', u'end_date': u'2019-08-17T17:02'}
    """

        document = Document2.objects.get(
            uuid=coordinator.get_data_for_json()['properties']
            ['document'])  # Assumes Hive SQL queries

        # (schedule_name,cluster_namespace) is unique
        #_get_snippet_name(notebook) --> name

        properties = {
            'name': 'query-%(uuid)s' % {
                'uuid': document.uuid
            },
            'username': request.user.username
        }

        sql_query = """
    CREATE SCHEDULED QUERY %(name)s
    CRON '1 1 * * *' AS
    SELECT 1
    """ % properties

        job = make_notebook(
            name=properties['name'],
            editor_type='hive',
            statement=sql_query,
            status='ready',
            database='default',
            is_task=False,
        )
        handle = job.execute_and_wait(request)

        return handle['history_uuid']
Example #39
0
def create_notebook(request):
    response = {'status': -1}

    editor_type = request.POST.get('type', 'notebook')
    gist_id = request.POST.get('gist')
    directory_uuid = request.POST.get('directory_uuid')
    is_blank = request.POST.get('blank', 'false') == 'true'

    if gist_id:
        gist_doc = _get_gist_document(uuid=gist_id)
        statement = json.loads(gist_doc.data)['statement']

        editor = make_notebook(name='',
                               description='',
                               editor_type=editor_type,
                               statement=statement,
                               is_presentation_mode=True)
    else:
        editor = Notebook()

        if EXAMPLES.AUTO_OPEN.get() and not is_blank:
            document = _get_dialect_example(dialect=editor_type)
            if document:
                editor = Notebook(document=document)
                editor = upgrade_session_properties(request, editor)

    data = editor.get_data()

    if editor_type != 'notebook':
        data['name'] = ''
        data[
            'type'] = 'query-%s' % editor_type  # TODO: Add handling for non-SQL types

    data['directoryUuid'] = directory_uuid
    editor.data = json.dumps(data)

    response['notebook'] = editor.get_data()
    response['status'] = 0

    return JsonResponse(response)
Example #40
0
File: views.py Project: mapr/hue
def drop_table(request, database):
  source_type = request.POST.get('source_type', request.GET.get('source_type', 'hive'))
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

  if request.method == 'POST':
    try:
      tables = request.POST.getlist('table_selection')
      tables_objects = [db.get_table(database, table) for table in tables]
      skip_trash = request.POST.get('skip_trash') == 'on'
      cluster = json.loads(request.POST.get('cluster', '{}'))
      namespace = request.POST.get('namespace')

      if request.POST.get('is_embeddable'):
        last_executed = json.loads(request.POST.get('start_time'), '-1')
        sql = db.drop_tables(database, tables_objects, design=None, skip_trash=skip_trash, generate_ddl_only=True)
        job = make_notebook(
            name=_('Drop table %s') % ', '.join([table.name for table in tables_objects])[:100],
            editor_type=source_type,
            statement=sql.strip(),
            status='ready',
            database=database,
            namespace=namespace,
            compute=cluster,
            on_success_url='assist.db.refresh',
            is_task=True,
            last_executed=last_executed
        )
        return JsonResponse(job.execute(request))
      else:
        # Can't be simpler without an important refactoring
        design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())
        query_history = db.drop_tables(database, tables_objects, design, skip_trash=skip_trash)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:show_tables', kwargs={'database': database})
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(tables)s.  Error: %(error)s") % {'tables': ','.join(tables), 'error': error_message}
      raise PopupException(error, title=_("DB Error"), detail=log)
Example #41
0
File: views.py Project: mapr/hue
def drop_database(request):
  source_type = request.POST.get('source_type', request.GET.get('source_type', 'hive'))
  cluster = json.loads(request.POST.get('cluster', '{}'))

  db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

  if request.method == 'POST':
    databases = request.POST.getlist('database_selection')

    try:
      if request.POST.get('is_embeddable'):
        design = SavedQuery.create_empty(app_name=source_type if source_type != 'hive' else 'beeswax', owner=request.user, data=hql_query('').dumps())
        last_executed = json.loads(request.POST.get('start_time'), '-1')
        cluster = json.loads(request.POST.get('cluster', '{}'))
        namespace = request.POST.get('namespace')
        sql = db.drop_databases(databases, design, generate_ddl_only=True)
        job = make_notebook(
            name=_('Drop database %s') % ', '.join(databases)[:100],
            editor_type=source_type,
            statement=sql.strip(),
            status='ready',
            database=None,
            namespace=namespace,
            compute=cluster,
            on_success_url='assist.db.refresh',
            is_task=True,
            last_executed=last_executed
        )
        return JsonResponse(job.execute(request))
      else:
        design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())
        query_history = db.drop_databases(databases, design)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:databases')
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(databases)s.  Error: %(error)s") % {'databases': ','.join(databases), 'error': error_message}
      raise PopupException(error, title=_("DB Error"), detail=log)
Example #42
0
def drop_database(request):
    db = dbms.get(request.user)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())

            if request.POST.get('is_embeddable'):
                sql = db.drop_databases(databases,
                                        design,
                                        generate_ddl_only=True)
                job = make_notebook(name='Execute and watch',
                                    editor_type='hive',
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    on_success_url='assist.db.refresh',
                                    is_task=True)
                return JsonResponse(job.execute(request))
            else:
                query_history = db.drop_databases(databases, design)
                url = reverse(
                    'beeswax:watch_query_history',
                    kwargs={
                        'query_history_id': query_history.id
                    }) + '?on_success_url=' + reverse('metastore:databases')
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
Example #43
0
def create_database(request, source, destination):
  database = destination['name']
  comment = destination['description']

  use_default_location = destination['useDefaultLocation']
  external_path = destination['nonDefaultLocation']

  sql = django_mako.render_to_string("gen/create_database_statement.mako", {
      'database': {
          'name': database,
          'comment': comment,
          'use_default_location': use_default_location,
          'external_location': external_path,
          'properties': [],
      }
    }
  )

  editor_type = 'hive'
  on_success_url = reverse('metastore:show_tables', kwargs={'database': database})

  notebook = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready', on_success_url=on_success_url)
  return notebook.execute(request, batch=False)
Example #44
0
  def create_query_document(self, owner, query_type='hive', database='default',
                            name='Test Query', description='Test Query', statement='',
                            files=None, functions=None, settings=None):
      """
      Creates and returns a query Document2 object
      :param owner: owner of doc
      :param query_type: hive, impala or spark
      :param database: database name
      :param name: name of document
      :param description: description of document
      :param statement: SQL statement (can be multi-query statement)
      :param files: list of dicts representing files
      :param functions: list of dicts representing functions
      :param settings: list of dicts representing settings
      :return: Document2 object representing query
      """
      if query_type not in ('hive', 'impala', 'spark'):
          raise ValueError("Invalid query_type: %s" % query_type)

      notebook = make_notebook(name=name, description=description, editor_type=query_type, statement=statement,
                               status='ready', database=database, files=files, functions=functions, settings=settings)
      notebook_doc, save_as = _save_notebook(notebook.get_data(), owner)
      return notebook_doc
Example #45
0
    def list_task(self, task_id):
        task_id = task_id.replace('schedule-hive-', '')

        sql_query = """
    SELECT * FROM information_schema.scheduled_queries
    WHERE scheduled_query_id = %(scheduled_query_id)s
    """ % {
            'scheduled_query_id': task_id
        }

        job = make_notebook(
            name='List Hive schedule id',
            editor_type='hive',
            statement=sql_query,
            status='ready',
            database='default',
            is_task=False,
        )
        request = MockRequest(self.user)

        handle = job.execute_and_wait(request, include_results=True)

        return self._get_task(handle['result']['data'][0])
Example #46
0
def run_sync_query(doc_id, user):
    '''Independently run a query as a user and insert the result into another table.'''
    # get SQL
    # Add INSERT INTO table
    # Add variables?
    # execute query
    # return when done. send email notification. get taskid.
    # see in Flower API for listing runs?
    from django.contrib.auth.models import User
    from notebook.models import make_notebook, MockedDjangoRequest

    from desktop.auth.backend import rewrite_user

    editor_type = 'impala'
    sql = 'INSERT into customer_scheduled SELECT * FROM default.customers LIMIT 100;'
    request = MockedDjangoRequest(
        user=rewrite_user(User.objects.get(username='******')))

    notebook = make_notebook(
        name='Scheduler query N',
        editor_type=editor_type,
        statement=sql,
        status='ready',
        #on_success_url=on_success_url,
        last_executed=time.mktime(datetime.datetime.now().timetuple()) * 1000,
        is_task=True)

    task = notebook.execute(request, batch=True)

    task['uuid'] = task['history_uuid']
    status = check_status(task)

    while status['status'] in ('waiting', 'running'):
        status = check_status(task)
        time.sleep(3)

    return task
Example #47
0
def run_sync_query(doc_id, user):
    '''Independently run a query as a user.'''
    # Add INSERT INTO table if persist result
    # Add variable substitution
    # Send notifications: done/on failure
    if type(user) is str:
        lookup = {orm_user_lookup(): user}
        user = User.objects.get(**lookup)
        user = rewrite_user(user)

    query_document = Document2.objects.get_by_uuid(user=user, uuid=doc_id)
    notebook = Notebook(document=query_document).get_data()
    snippet = notebook['snippets'][0]

    editor_type = snippet['type']
    sql = _get_statement(notebook)
    request = MockedDjangoRequest(user=user)
    last_executed = time.mktime(datetime.datetime.now().timetuple()) * 1000

    notebook = make_notebook(name='Scheduled query %s at %s' %
                             (query_document.name, last_executed),
                             editor_type=editor_type,
                             statement=sql,
                             status='ready',
                             last_executed=last_executed,
                             is_task=True)

    task = notebook.execute(request, batch=True)

    task['uuid'] = task['history_uuid']
    status = check_status(task)

    while status['status'] in ('waiting', 'running'):
        status = check_status(task)
        time.sleep(3)

    return task
Example #48
0
    def create_query_document(self,
                              owner,
                              query_type='hive',
                              database='default',
                              name='Test Query',
                              description='Test Query',
                              statement='',
                              files=None,
                              functions=None,
                              settings=None):
        """
      Creates and returns a query Document2 object
      :param owner: owner of doc
      :param query_type: hive, impala or spark
      :param database: database name
      :param name: name of document
      :param description: description of document
      :param statement: SQL statement (can be multi-query statement)
      :param files: list of dicts representing files
      :param functions: list of dicts representing functions
      :param settings: list of dicts representing settings
      :return: Document2 object representing query
      """
        if query_type not in ('hive', 'impala', 'spark'):
            raise ValueError("Invalid query_type: %s" % query_type)

        notebook = make_notebook(name=name,
                                 description=description,
                                 editor_type=query_type,
                                 statement=statement,
                                 status='ready',
                                 database=database,
                                 files=files,
                                 functions=functions,
                                 settings=settings)
        notebook_doc, save_as = _save_notebook(notebook.get_data(), owner)
        return notebook_doc
Example #49
0
  def run_morphline(self, collection_name, morphline, input_path):
    workspace_path = self._upload_workspace(morphline)

    snippet_properties =  {
      u'files': [
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ],
      u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool',
      u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(),
      u'arguments': [
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          zkensemble(),
          u'--collection',
          collection_name,
          u'${nameNode}%s' % input_path,
      ],
      u'archives': [],
    }

    notebook = make_notebook(name='Indexer', editor_type='java', snippet_properties=snippet_properties).get_data()
    notebook_doc, created = _save_notebook(notebook, self.user)

    workflow_doc = WorkflowBuilder().create_workflow(document=notebook_doc, user=self.user, managed=True, name=_("Batch job for %s") % notebook_doc.name)
    workflow = Workflow(document=workflow_doc, user=self.user)

    job_id = _submit_workflow(user=self.user, fs=self.fs, jt=self.jt, workflow=workflow, mapping=None)

    return job_id
Example #50
0
  def run_morphline(self, request, collection_name, morphline, input_path):
    workspace_path = self._upload_workspace(morphline)

#     snippets = [
#       {
#         u'type': u'java',
#         u'files': [
#             {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
#             {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
#         ],
#         u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool',
#         u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(),
#         u'arguments': [
#             u'--morphline-file',
#             u'morphline.conf',
#             u'--output-dir',
#             u'${nameNode}/user/%s/indexer' % self.username,
#             u'--log4j',
#             u'log4j.properties',
#             u'--go-live',
#             u'--zk-host',
#             zkensemble(),
#             u'--collection',
#             collection_name,
#             input_path,
#         ],
#         u'archives': [],
#       }
#     ]
#
#     # managed notebook
#     notebook = make_notebook2(name='Indexer job for %s' % collection_name, snippets=snippets).get_data()
#     notebook_doc, created = _save_notebook(notebook, self.user)
#
#     snippet = {'wasBatchExecuted': True}

    snippet_properties =  {
       u'files': [
           {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
           {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
       ],
       u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool',
       u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(),
       u'arguments': [
           u'--morphline-file',
           u'morphline.conf',
           u'--output-dir',
           u'${nameNode}/user/%s/indexer' % self.username,
           u'--log4j',
           u'log4j.properties',
           u'--go-live',
           u'--zk-host',
           zkensemble(),
           u'--collection',
           collection_name,
           input_path,
       ],
       u'archives': [],
    }

    notebook = make_notebook(name='Indexer', editor_type='java', snippet_properties=snippet_properties, status='running').get_data()
    notebook_doc, created = _save_notebook(notebook, self.user)

    snippet = {'wasBatchExecuted': True, 'id': notebook['snippets'][0]['id'], 'statement': ''}

    job_handle = _execute_notebook(request, notebook, snippet)

    return job_handle
Example #51
0
File: api.py Project: cloudera/hue
  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and db.client.query_server['server_name'] != 'impala':
    query_server = get_query_server_config('impala', cluster=cluster)
    db = dbms.get(db.client.user, query_server, cluster=cluster)

  sample_data = db.get_sample(database, table_obj, column, generate_sql_only=async, operation=operation)
  response = {'status': -1}

  if sample_data:
    response['status'] = 0
    if async:
      notebook = make_notebook(
          name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column},
          editor_type=_get_servername(db),
          statement=sample_data,
          status='ready-execute',
          skip_historify=True,
          is_task=False,
          compute=cluster if cluster else None
      )
      response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False)
      if table_obj.is_impala_only:
        response['result']['type'] = 'impala'
    else:
      sample = escape_rows(sample_data.rows(), nulls_only=True)
      if column:
        sample = set([row[0] for row in sample])
        sample = [[item] for item in sorted(list(sample))]

      response['headers'] = sample_data.cols()
      response['full_headers'] = sample_data.full_cols()
Example #52
0
  def query(self, dashboard, query, facet=None):
    database, table = self._get_database_table_names(dashboard['name'])

    if query['qs'] == [{'q': '_root_:*'}]:
      return {'response': {'numFound': 0}}

    filters = [q['q'] for q in query['qs'] if q['q']]
    filters.extend(self._get_fq(dashboard, query, facet))

    timeFilter = self._get_time_filter_query(dashboard, query)
    if timeFilter:
      filters.append(timeFilter)

    if facet:
      if facet['type'] == 'nested':
        fields_dimensions = [self._get_dimension_field(f)['name'] for f in self._get_dimension_fields(facet)]
        last_dimension_seen = False
        fields = []
        for f in reversed(facet['properties']['facets']):
          if f['aggregate']['function'] == 'count':
            if not last_dimension_seen:
              fields.insert(0, 'COUNT(*) AS Count')
              last_dimension_seen = True
            fields.insert(0, self._get_dimension_field(f)['select'])
          else:
            if not last_dimension_seen:
              fields.insert(0, self._get_aggregate_function(f))

        if not last_dimension_seen:
          fields.insert(0, 'COUNT(*) as Count')
        fields.insert(0, self._get_dimension_field(facet)['select'])

        sql = '''SELECT %(fields)s
        FROM %(database)s.%(table)s
        %(filters)s
        GROUP BY %(fields_dimensions)s
        ORDER BY %(order_by)s
        LIMIT %(limit)s''' % {
            'database': database,
            'table': table,
            'fields': ', '.join(fields),
            'fields_dimensions': ', '.join(fields_dimensions),
            'order_by': ', '.join([self._get_dimension_field(f)['order_by'] for f in self._get_dimension_fields(facet)]),
            'filters': self._convert_filters_to_where(filters),
            'limit': LIMIT
        }
      elif facet['type'] == 'function': # 1 dim only now
        sql = '''SELECT %(fields)s
        FROM %(database)s.%(table)s
        %(filters)s''' % {
            'database': database,
            'table': table,
            'fields': self._get_aggregate_function(facet),
            'filters': self._convert_filters_to_where(filters),
        }
    else:
      fields = Collection2.get_field_list(dashboard)
      sql = "SELECT %(fields)s FROM `%(database)s`.`%(table)s`" % {
          'database': database,
          'table': table,
          'fields': ', '.join(['`%s`' % f if f != '*' else '*' for f in fields])
      }
      if filters:
        sql += ' ' + self._convert_filters_to_where(filters)
      sql += ' LIMIT %s' % LIMIT

    editor = make_notebook(
        name='Execute and watch',
        editor_type=dashboard['engine'],
        statement=sql,
        database=database,
        status='ready-execute',
        skip_historify=True
    )

    response = editor.execute(MockRequest(self.user))

    if 'handle' in response and response['handle'].get('sync'):
      response['result'] = self._convert_result(response['result'], dashboard, facet, query)

    return response
Example #53
0
def execute_and_watch(request):
    notebook_id = request.GET.get("editor", request.GET.get("notebook"))
    snippet_id = int(request.GET["snippet"])
    action = request.GET["action"]
    destination = request.GET["destination"]

    notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data()
    snippet = notebook["snippets"][snippet_id]
    editor_type = snippet["type"]

    api = get_api(request, snippet)

    if action == "save_as_table":
        sql, success_url = api.export_data_as_table(notebook, snippet, destination)
        editor = make_notebook(
            name="Execute and watch",
            editor_type=editor_type,
            statement=sql,
            status="ready-execute",
            database=snippet["database"],
        )
    elif action == "insert_as_query":
        sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)
        editor = make_notebook(
            name="Execute and watch",
            editor_type=editor_type,
            statement=sql,
            status="ready-execute",
            database=snippet["database"],
        )
    elif action == "index_query":
        sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location="")
        editor = make_notebook(name="Execute and watch", editor_type=editor_type, statement=sql, status="ready-execute")

        sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True)

        from indexer.api3 import _index  # Will ve moved to the lib in next commit
        from indexer.file_format import HiveFormat
        from indexer.fields import Field

        file_format = {
            "name": "col",
            "inputFormat": "query",
            "format": {
                "quoteChar": '"',
                "recordSeparator": "\n",
                "type": "csv",
                "hasHeader": False,
                "fieldSeparator": "\u0001",
            },
            "sample": "",
            "columns": [
                Field(col["name"], HiveFormat.FIELD_TYPE_TRANSLATE.get(col["type"], "string")).to_dict()
                for col in sample["meta"]
            ],
        }

        job_handle = _index(request, file_format, destination, query=notebook["uuid"])
        return redirect(reverse("oozie:list_oozie_workflow", kwargs={"job_id": job_handle["handle"]["id"]}))
    else:
        raise PopupException(_("Action %s is unknown") % action)

    return render(
        "editor.mako",
        request,
        {
            "notebooks_json": json.dumps([editor.get_data()]),
            "options_json": json.dumps(
                {
                    "languages": [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
                    "mode": "editor",
                    "editor_type": editor_type,
                    "success_url": success_url,
                }
            ),
            "editor_type": editor_type,
        },
    )
Example #54
0
def _create_table_from_a_file(request, source, destination):
  if '.' in destination['name']:
    database, table_name = destination['name'].split('.', 1)
  else:
    database = 'default'
    table_name = destination['name']
  final_table_name = table_name

  table_format = destination['tableFormat']

  columns = destination['columns']
  partition_columns = destination['partitionColumns']
  kudu_partition_columns = destination['kuduPartitionColumns']
  print kudu_partition_columns
  comment = destination['description']

  source_path = source['path']
  external = not destination['useDefaultLocation']
  external_path = destination['nonDefaultLocation']

  load_data = destination['importData']
  skip_header = destination['hasHeader']

  primary_keys = destination['primaryKeys']

  if destination['useCustomDelimiters']:
    field_delimiter = destination['customFieldDelimiter']
    collection_delimiter = destination['customCollectionDelimiter']
    map_delimiter = destination['customMapDelimiter']
    regexp_delimiter = destination['customRegexp']
  else:
    field_delimiter = ','
    collection_delimiter = r'\\002'
    map_delimiter = r'\\003'
    regexp_delimiter = '.*'

  file_format = 'TextFile'
  row_format = 'Delimited'
  serde_name = ''
  serde_properties = ''
  extra_create_properties = ''
  sql = ''

  if source['inputFormat'] == 'manual':
    load_data = False

  if table_format == 'json':
    row_format = 'serde'
    serde_name = 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
    serde_properties = '''"separatorChar" = "\\t",
   "quoteChar"     = "'",
   "escapeChar"    = "\\\\"
   '''

  if table_format in ('parquet', 'kudu'):
    if load_data:
      table_name, final_table_name = 'hue__tmp_%s' % table_name, table_name

      sql += '\n\nDROP TABLE IF EXISTS `%(database)s`.`%(table_name)s`;\n' % {
          'database': database,
          'table_name': table_name
      }
    else:
      row_format = ''
      file_format = table_format
      skip_header = False
      if table_format == 'kudu':
        columns = [col for col in columns if col['name'] in primary_keys] + [col for col in columns if col['name'] not in primary_keys]

  if external or (load_data and table_format in ('parquet', 'kudu')):
    if not request.fs.isdir(external_path): # File selected
      external_path, external_file_name = request.fs.split(external_path)

      if len(request.fs.listdir(external_path)) > 1:
        external_path = external_path + '/%s_table' % external_file_name # If dir not just the file, create data dir and move file there.
        request.fs.mkdir(external_path)
        request.fs.rename(source_path, external_path)

  sql += django_mako.render_to_string("gen/create_table_statement.mako", {
      'table': {
          'name': table_name,
          'comment': comment,
          'row_format': row_format,
          'field_terminator': field_delimiter,
          'collection_terminator': collection_delimiter,
          'map_key_terminator': map_delimiter,
          'serde_name': serde_name,
          'serde_properties': serde_properties,
          'file_format': file_format,
          'external': external or load_data and table_format in ('parquet', 'kudu'),
          'path': external_path,
          'skip_header': skip_header,
          'primary_keys': primary_keys if table_format == 'kudu' and not load_data else [],
       },
      'columns': columns,
      'partition_columns': partition_columns,
      'kudu_partition_columns': kudu_partition_columns,
      'database': database
    }
  )

  if table_format == 'text' and not external and load_data:
    sql += "\n\nLOAD DATA INPATH '%s' INTO TABLE `%s`.`%s`;" % (source_path, database, table_name)

  if load_data and table_format in ('parquet', 'kudu'):
    file_format = table_format
    if table_format == 'kudu':
      columns_list = ['`%s`' % col for col in primary_keys] + [col['name'] for col in destination['columns'] if col['name'] not in primary_keys]
      extra_create_properties = """PRIMARY KEY (%(primary_keys)s)
      DISTRIBUTE BY HASH INTO 16 BUCKETS
      STORED AS %(file_format)s
      TBLPROPERTIES(
      'kudu.num_tablet_replicas' = '1'
      )""" % {
        'file_format': file_format,
        'primary_keys': ', '.join(primary_keys)
      }
    else:
      columns_list = ['*']
    sql += '''\n\nCREATE TABLE `%(database)s`.`%(final_table_name)s`
      %(extra_create_properties)s
      AS SELECT %(columns_list)s
      FROM `%(database)s`.`%(table_name)s`;''' % {
        'database': database,
        'final_table_name': final_table_name,
        'table_name': table_name,
        'extra_create_properties': extra_create_properties,
        'columns_list': ', '.join(columns_list),
    }
    sql += '\n\nDROP TABLE IF EXISTS `%(database)s`.`%(table_name)s`;\n' % {
        'database': database,
        'table_name': table_name
    }

  editor_type = 'impala' if table_format == 'kudu' else 'hive'
  on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': table_name})

  return make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready', database=database, on_success_url=on_success_url)