Esempio n. 1
0
  def load_partition(self, django_user, partition_spec, filepath, columns):
    if has_concurrency_support() and self.is_transactional:
      with open(filepath) as f:
        hql = \
          """
          INSERT INTO TABLE %(tablename)s
          PARTITION (%(partition_spec)s)
          VALUES %(values)s
          """ % {
            'tablename': self.name,
            'partition_spec': partition_spec,
            'values': self._get_sql_insert_values(f, columns)
          }
    else:
      # Upload data found at filepath to HDFS home of user, the load intto a specific partition
      LOAD_PARTITION_HQL = \
        """
        ALTER TABLE %(tablename)s ADD PARTITION(%(partition_spec)s) LOCATION '%(filepath)s'
        """

      partition_dir = self._get_partition_dir(partition_spec)
      hdfs_root_destination = self._get_hdfs_root_destination(django_user, subdir=partition_dir)
      filename = filepath.split('/')[-1]
      hdfs_file_destination = self._upload_to_hdfs(django_user, filepath, hdfs_root_destination, filename)

      hql = LOAD_PARTITION_HQL % {'tablename': self.name, 'partition_spec': partition_spec, 'filepath': hdfs_root_destination}

    self._load_data_to_table(django_user, hql)
Esempio n. 2
0
  def load(self, django_user):
    if has_concurrency_support() and self.is_transactional:
      with open(self._contents_file) as f:
        hql = \
          """
          INSERT INTO TABLE %(tablename)s
          VALUES %(values)s
          """ % {
            'tablename': self.name,
            'values': self._get_sql_insert_values(f)
          }
    else:
      # Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS).
      hdfs_root_destination = self._get_hdfs_root_destination(django_user)
      hdfs_file_destination = self._upload_to_hdfs(django_user, self._contents_file, hdfs_root_destination)
      hql = \
        """
        LOAD DATA INPATH
        '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
        """ % {
          'tablename': self.name,
          'filename': hdfs_file_destination
        }

    self._load_data_to_table(django_user, hql)
Esempio n. 3
0
  def install(self, django_user):
    if has_concurrency_support() and not self.is_transactional:
      LOG.info('Skipping table %s as non transactional' % self.name)
      return
    if not (has_concurrency_support() and self.is_transactional) and not cluster.get_hdfs():
      raise PopupException('Requiring a File System to load its data')

    self.create(django_user)

    if self.partition_files:
      for partition_spec, filepath in list(self.partition_files.items()):
        self.load_partition(django_user, partition_spec, filepath, columns=self.columns)
    else:
      self.load(django_user)

    return True
Esempio n. 4
0
  def load(self, django_user):
    inserts = []

    if (self.dialect not in ('hive', 'impala') or has_concurrency_support()) and self.is_transactional:
      with open(self._contents_file) as f:
        if self.insert_sql:
          sql_insert = self.insert_sql
        else:
          sql_insert = """
            INSERT INTO TABLE %(tablename)s
            VALUES %(values)s
            """
        values = self._get_sql_insert_values(f)
        for value in values:
          inserts.append(
            sql_insert % {
              'tablename': self.name,
              'values': value
            }
          )
    else:
      # Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS).
      hdfs_root_destination = self._get_hdfs_root_destination(django_user)
      hdfs_file_destination = self._upload_to_hdfs(django_user, self._contents_file, hdfs_root_destination)
      hql = """
        LOAD DATA INPATH
        '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
        """ % {
          'tablename': self.name,
          'filename': hdfs_file_destination
        }
      inserts.append(hql)

    for insert in inserts:
      self._load_data_to_table(django_user, insert)
Esempio n. 5
0
    def handle(self, *args, **options):
        if args:
            dialect = args[0]
            db_name = args[1] if len(args) > 1 else 'default'
            user = User.objects.get(username=pwd.getpwuid(os.getuid()).pw_name)
            request = None
        else:
            dialect = options['dialect']
            db_name = options.get('db_name', 'default')
            interpreter = options.get(
                'interpreter'
            )  # Only when connectors are enabled. Later will deprecate `dialect`.
            user = options['user']
            request = options['request']

        tables = 'tables_standard.json' if dialect not in (
            'hive',
            'impala') else ('tables_transactional.json'
                            if has_concurrency_support() else 'tables.json')
        exception = None

        self.successes = []
        self.errors = []
        try:
            sample_user = install_sample_user(
                user
            )  # Documents will belong to the sample user but we run the SQL as the current user
            self.install_queries(sample_user, dialect, interpreter=interpreter)
            self.install_tables(user,
                                dialect,
                                db_name,
                                tables,
                                interpreter=interpreter,
                                request=request)
        except Exception as ex:
            exception = ex

        if exception is not None:
            pretty_msg = None

            if "Permission denied" in str(exception):
                pretty_msg = _(
                    "Permission denied. Please check with your system administrator."
                )

            if pretty_msg is not None:
                raise PopupException(pretty_msg)
            else:
                raise exception

        return self.successes, self.errors
Esempio n. 6
0
    def _install_queries(self, django_user, app_name):
        design_file = file(
            os.path.join(beeswax.conf.LOCAL_EXAMPLES_DATA_DIR.get(),
                         'designs.json'))
        design_list = json.load(design_file)
        design_file.close()

        # Filter design list to app-specific designs
        app_type = HQL if app_name == 'beeswax' else IMPALA
        design_list = [d for d in design_list if int(d['type']) == app_type]

        for design_dict in design_list:
            if not has_concurrency_support(
            ) or design_dict['name'] != 'Sample: Customers':
                design = SampleQuery(design_dict)
            try:
                design.install(django_user)
            except Exception as ex:
                raise InstallException(_('Could not install query: %s') % ex)
Esempio n. 7
0
    def handle(self, *args, **options):
        if args:
            app_name = args[0]
            db_name = args[1] if len(args) > 1 else 'default'
            user = User.objects.get(username=pwd.getpwuid(os.getuid()).pw_name)
        else:
            app_name = options['app_name']
            db_name = options.get('db_name', 'default')
            user = options['user']

        tables = options['tables'] if 'tables' in options else (
            'tables_transactional.json'
            if has_concurrency_support() else 'tables.json')

        exception = None

        # Documents will belong to this user but we run the install as the current user
        try:
            sample_user = install_sample_user()
            self._install_queries(sample_user, app_name)
            self._install_tables(user, app_name, db_name, tables)
        except Exception as ex:
            exception = ex

        Document.objects.sync()

        if exception is not None:
            pretty_msg = None

            if "AlreadyExistsException" in exception.message:
                pretty_msg = _("SQL table examples already installed.")
            if "Permission denied" in exception.message:
                pretty_msg = _(
                    "Permission denied. Please check with your system administrator."
                )

            if pretty_msg is not None:
                raise PopupException(pretty_msg)
            else:
                raise exception