Exemple #1
0
class ColumnTypeForm(DependencyAwareForm):
  """
  Form used to specify a column during table creation
  """
  dependencies = [
    ("column_type", "array", "array_type"),
    ("column_type", "map", "map_key_type"),
    ("column_type", "map", "map_value_type"),
    ("column_type", "char", "char_length"),
    ("column_type", "varchar", "varchar_length")
  ]
  column_name = common.HiveIdentifierField(label=_t('Column Name'), required=True)
  column_type = forms.ChoiceField(label=_t('Column Type'), required=True,
    choices=common.to_choices(HIVE_TYPES))
  array_type = forms.ChoiceField(required=False,
    choices=common.to_choices(HIVE_PRIMITIVE_TYPES), label=_t("Array Value Type"))
  map_key_type = forms.ChoiceField(required=False,
                                   choices=common.to_choices(HIVE_PRIMITIVE_TYPES),
                                   help_text=_t("Specify if column_type is map."))
  map_value_type = forms.ChoiceField(required=False,
                                     choices=common.to_choices(HIVE_PRIMITIVE_TYPES),
                                     help_text=_t("Specify if column_type is map."))
  char_length = forms.IntegerField(required=False, initial=1,
                                   widget=NumberInput(attrs={'min': 1, 'max': 255}),
                                   validators=[MinValueValidator(1), MaxValueValidator(255)],
                                   help_text=_t("Specify if column_type is char"))
  varchar_length = forms.IntegerField(required=False, initial=1,
                                      widget=NumberInput(attrs={'min': 1, 'max': 65355}),
                                      validators=[MinValueValidator(1), MaxValueValidator(65355)],
                                      help_text=_t("Specify if column_is varchar"))
Exemple #2
0
class SaveResultsForm(DependencyAwareForm):
  """Used for saving the query result data"""

  SAVE_TYPES = (SAVE_TYPE_TBL, SAVE_TYPE_DIR) = ('to a new table', 'to HDFS directory')
  save_target = forms.ChoiceField(required=True,
                                  choices=common.to_choices(SAVE_TYPES),
                                  widget=forms.RadioSelect)
  target_table = common.HiveIdentifierField(
                                  label="Table Name",
                                  required=False,
                                  help_text="Name of the new table")
  target_dir = filebrowser.forms.PathField(
                                  label="Results Location",
                                  required=False,
                                  help_text="Empty directory in HDFS to put the results")
  dependencies = [
    ('save_target', SAVE_TYPE_TBL, 'target_table'),
    ('save_target', SAVE_TYPE_DIR, 'target_dir'),
  ]

  def clean_target_table(self):
    tbl = self.cleaned_data.get('target_table')
    if tbl:
      try:
        db_utils.meta_client().get_table("default", tbl)
        raise forms.ValidationError('Table already exists')
      except hive_metastore.ttypes.NoSuchObjectException:
        pass
    return tbl
Exemple #3
0
class PartitionTypeForm(forms.Form):
    dependencies = [("column_type", "char", "char_length"),
                    ("column_type", "varchar", "varchar_length")]
    column_name = common.HiveIdentifierField(required=True)
    column_type = forms.ChoiceField(
        required=True, choices=common.to_choices(HIVE_PRIMITIVE_TYPES))
    char_length = forms.IntegerField(
        required=False,
        initial=1,
        widget=NumberInput(attrs={
            'min': 1,
            'max': 255
        }),
        validators=[MinValueValidator(1),
                    MaxValueValidator(255)],
        help_text=_t("Specify if column_type is char"))
    varchar_length = forms.IntegerField(
        required=False,
        initial=1,
        widget=NumberInput(attrs={
            'min': 1,
            'max': 65355
        }),
        validators=[MinValueValidator(1),
                    MaxValueValidator(65355)],
        help_text=_t("Specify if column_is varchar"))
Exemple #4
0
class CreateByImportFileForm(forms.Form):
  """Form for step 1 (specifying file) of the import wizard"""

  # Basic Data
  name = common.HiveIdentifierField(label=_t("Table Name"), required=True)
  comment = forms.CharField(label=_t("Description"), required=False)

  # File info
  path = PathField(label=_t("Input File or Directory"))
  load_data = forms.ChoiceField(required=True,
    choices=[
      ("IMPORT", _("Import data")),
      ("EXTERNAL", _("Create External Table")),
      ("EMPTY", ("Leave Empty")),
    ],
    help_text=_t("Select 'import' to load data from the file into the Hive warehouse directory after creation. "
       "Select 'external' if the table is an external table and the data files should not be moved. " +
       "Select 'empty' if the file should only be used to define the table schema but not loaded (table will be empty).")
  )

  def __init__(self, *args, **kwargs):
    self.db = kwargs.pop('db', None)
    super(CreateByImportFileForm, self).__init__(*args, **kwargs)

  def clean_name(self):
    return _clean_tablename(self.db, self.cleaned_data['name'])

  def clean_path(self):
    path = self.cleaned_data['path']
    if path.lower().startswith(S3_ROOT):
      path = path.lower().replace(S3_ROOT, S3A_ROOT)
    if not path.endswith('/'):
      path = '%s/' % path
    return path
Exemple #5
0
class ColumnTypeForm(DependencyAwareForm):
    """
  Form used to specify a column during table creation
  """
    dependencies = [
        ("column_type", "array", "array_type"),
        ("column_type", "map", "map_key_type"),
        ("column_type", "map", "map_value_type"),
    ]
    column_name = common.HiveIdentifierField(label=_t('Column Name'),
                                             required=True)
    column_type = forms.ChoiceField(label=_t('Column Type'),
                                    required=True,
                                    choices=common.to_choices(HIVE_TYPES))
    array_type = forms.ChoiceField(
        required=False,
        choices=common.to_choices(HIVE_PRIMITIVE_TYPES),
        label=_t("Array Value Type"))
    map_key_type = forms.ChoiceField(
        required=False,
        choices=common.to_choices(HIVE_PRIMITIVE_TYPES),
        help_text=_t("Specify if column_type is map."))
    map_value_type = forms.ChoiceField(
        required=False,
        choices=common.to_choices(HIVE_PRIMITIVE_TYPES),
        help_text=_t("Specify if column_type is map."))
Exemple #6
0
class SaveResultsForm(DependencyAwareForm):
    """Used for saving the query result data"""

    SAVE_TYPES = (SAVE_TYPE_TBL, SAVE_TYPE_DIR) = (_('to a new table'),
                                                   _('to HDFS directory'))
    save_target = forms.ChoiceField(required=True,
                                    choices=common.to_choices(SAVE_TYPES),
                                    widget=forms.RadioSelect)
    target_table = common.HiveIdentifierField(
        label=_t("Table Name"),
        required=False,
        help_text=_t("Name of the new table"))
    target_dir = PathField(
        label=_t("Results Location"),
        required=False,
        help_text=_t("Empty directory in HDFS to put the results"))
    dependencies = [
        ('save_target', SAVE_TYPE_TBL, 'target_table'),
        ('save_target', SAVE_TYPE_DIR, 'target_dir'),
    ]

    def __init__(self, *args, **kwargs):
        self.db = kwargs.pop('db', None)
        super(SaveResultsForm, self).__init__(*args, **kwargs)

    def clean_target_table(self):
        tbl = self.cleaned_data.get('target_table')
        if tbl:
            try:
                if self.db is not None:
                    self.db.get_table("default", tbl)
                raise forms.ValidationError(_('Table already exists'))
            except hive_metastore.ttypes.NoSuchObjectException:
                pass
        return tbl
Exemple #7
0
class SaveResultsForm(DependencyAwareForm):
    """Used for saving the query result data"""

    SAVE_TYPES = (SAVE_TYPE_TBL, SAVE_TYPE_DIR) = ('to a new table',
                                                   'to HDFS directory')
    save_target = forms.ChoiceField(required=True,
                                    choices=common.to_choices(SAVE_TYPES),
                                    widget=forms.RadioSelect,
                                    initial=SAVE_TYPE_TBL)
    target_table = common.HiveIdentifierField(
        label=_t("Table Name"),
        required=False,
        help_text=_t("Name of the new table"))
    target_dir = PathField(
        label=_t("Results Location"),
        required=False,
        help_text=_t("Empty directory in HDFS to store results."))
    dependencies = [
        ('save_target', SAVE_TYPE_TBL, 'target_table'),
        ('save_target', SAVE_TYPE_DIR, 'target_dir'),
    ]

    def __init__(self, *args, **kwargs):
        self.db = kwargs.pop('db', None)
        self.fs = kwargs.pop('fs', None)
        super(SaveResultsForm, self).__init__(*args, **kwargs)

    def clean(self):
        cleaned_data = super(SaveResultsForm, self).clean()

        if cleaned_data.get('save_target') == SaveResultsForm.SAVE_TYPE_TBL:
            tbl = cleaned_data.get('target_table')
            if tbl:
                try:
                    if self.db is not None:
                        self.db.get_table('default',
                                          tbl)  # Assumes 'default' DB
                    self._errors['target_table'] = self.error_class(
                        [_('Table already exists')])
                    del cleaned_data['target_table']
                except Exception:
                    pass
        elif cleaned_data['save_target'] == SaveResultsForm.SAVE_TYPE_DIR:
            target_dir = cleaned_data['target_dir']
            if not target_dir.startswith('/'):
                self._errors['target_dir'] = self.error_class(
                    [_('Directory should start with /')])
            elif self.fs.exists(target_dir):
                self._errors['target_dir'] = self.error_class([
                    _('Directory already exists.')
                ])  # Overwrite destination directory content

        return cleaned_data
Exemple #8
0
class SaveResultsTableForm(forms.Form):
    """Used for saving the query result data to hive table"""

    target_table = common.HiveIdentifierField(
        label=_t("Table Name"),
        required=True,
        help_text=_t("Name of the new table")
    )  # Can also contain a DB prefixed table name, e.g. DB_NAME.TABLE_NAME

    def __init__(self, *args, **kwargs):
        self.db = kwargs.pop('db', None)
        self.target_database = kwargs.pop('database', 'default')
        super(SaveResultsTableForm, self).__init__(*args, **kwargs)

    def clean(self):
        cleaned_data = super(SaveResultsTableForm, self).clean()
        target_table = cleaned_data.get('target_table')

        if not target_table:
            raise forms.ValidationError(_("Table name is required."))
        else:
            if self.db is None:
                raise forms.ValidationError(
                    _("Cannot validate form, db object is required."))
            else:
                # Table field may be set to <database>.<table> so we need to parse it before validation
                name_parts = target_table.split(".")
                if len(name_parts) == 1:
                    pass
                elif len(name_parts
                         ) == 2:  # Update table name without the DB prefix
                    self.target_database, target_table = name_parts
                else:
                    raise forms.ValidationError(
                        _("Invalid table prefix name."))

                # Check if table already exists
                table = None
                try:
                    table = self.db.get_table(self.target_database,
                                              target_table)
                except Exception:
                    cleaned_data['target_table'] = target_table

                if table is not None:
                    raise forms.ValidationError(
                        _("Table %s.%s already exists") %
                        (self.target_database, target_table))

        return cleaned_data
Exemple #9
0
class CreateByImportFileForm(forms.Form):
  """Form for step 1 (specifying file) of the import wizard"""
  # Basic Data
  name = common.HiveIdentifierField(label="Table Name", required=True)
  comment = forms.CharField(label="Description", required=False)

  # File info
  path = filebrowser.forms.PathField(label="Input File")
  do_import = forms.BooleanField(required=False, initial=True,
                          label="Import data from file",
                          help_text="Automatically load this file into the table after creation")

  def clean_name(self):
    return _clean_tablename(self.cleaned_data['name'])
Exemple #10
0
class CreateByImportFileForm(forms.Form):
  """Form for step 1 (specifying file) of the import wizard"""
  # Basic Data
  name = common.HiveIdentifierField(label=_t("Table Name"), required=True)
  comment = forms.CharField(label=_t("Description"), required=False)

  # File info
  path = PathField(label=_t("Input File"))
  do_import = forms.BooleanField(required=False, initial=True,
                          label=_t("Import data from file"),
                          help_text=_t("Automatically load this file into the table after creation."))

  def __init__(self, *args, **kwargs):
    self.db = kwargs.pop('db', None)
    super(CreateByImportFileForm, self).__init__(*args, **kwargs)

  def clean_name(self):
    return _clean_tablename(self.db, self.cleaned_data['name'])
Exemple #11
0
class CreateDatabaseForm(DependencyAwareForm):
  """
  Form used in the create database page
  """
  dependencies = []

  # Basic Data
  name = common.HiveIdentifierField(label=_t("Database Name"), required=True)
  comment = forms.CharField(label=_t("Description"), required=False)

  # External if not true
  use_default_location = forms.BooleanField(required=False, initial=True, label=_t("Use default location"))
  external_location = forms.CharField(required=False, help_text=_t("Path to HDFS directory or file of database data."))

  dependencies += [
    ("use_default_location", False, "external_location")
  ]

  def clean_name(self):
    return _clean_databasename(self.cleaned_data['name'])
Exemple #12
0
class SaveResultsTableForm(forms.Form):
    """Used for saving the query result data to hive table"""

    target_table = common.HiveIdentifierField(
        label=_t("Table Name"),
        required=True,
        help_text=_t("Name of the new table")
    )  # Can also contain a DB prefixed table name, e.g. DB_NAME.TABLE_NAME

    def __init__(self, *args, **kwargs):
        self.db = kwargs.pop('db', None)
        self.target_database = kwargs.pop('database', 'default')
        super(SaveResultsTableForm, self).__init__(*args, **kwargs)

    def clean(self):
        cleaned_data = super(SaveResultsTableForm, self).clean()

        target_table = cleaned_data.get('target_table')
        if target_table:
            try:
                if self.db is not None:
                    name_parts = target_table.split(".")
                    if len(name_parts) == 1:
                        pass
                    elif len(name_parts) == 2:
                        self.target_database, target_table = name_parts
                    else:
                        self._errors['target_table'] = self.error_class(
                            [_('Invalid table prefix name')])
                    cleaned_data[
                        'target_table'] = target_table  # Update table name without the DB prefix
                    self.db.get_table(self.target_database, target_table)
                self._errors['target_table'] = self.error_class(
                    [_('Table already exists')])
                del cleaned_data['target_table']
            except Exception:
                pass

        return cleaned_data
Exemple #13
0
class CreateTableForm(DependencyAwareForm):
  """
  Form used in the create table page
  """
  dependencies = []

  # Basic Data
  name = common.HiveIdentifierField(label=_t("Table Name"), required=True)
  comment = forms.CharField(label=_t("Description"), required=False)

  # Row Formatting
  row_format = forms.ChoiceField(required=True,
                                choices=common.to_choices([ "Delimited", "SerDe" ]),
                                initial="Delimited")

  # Delimited Row
  # These initials are per LazySimpleSerDe.DefaultSeparators
  field_terminator = ChoiceOrOtherField(label=_t("Field terminator"), required=False, initial=TERMINATOR_CHOICES[0][0],
    choices=TERMINATOR_CHOICES)
  collection_terminator = ChoiceOrOtherField(label=_t("Collection terminator"), required=False, initial=TERMINATOR_CHOICES[1][0],
    choices=TERMINATOR_CHOICES)
  map_key_terminator = ChoiceOrOtherField(label=_t("Map key terminator"), required=False, initial=TERMINATOR_CHOICES[2][0],
    choices=TERMINATOR_CHOICES)
  dependencies += [
    ("row_format", "Delimited", "field_terminator"),
    ("row_format", "Delimited", "collection_terminator"),
    ("row_format", "Delimited", "map_key_terminator"),
  ]

  # Serde Row
  serde_name = forms.CharField(required=False, label=_t("SerDe Name"))
  serde_properties = forms.CharField(
                        required=False,
                        help_text=_t("Comma-separated list of key-value pairs. E.g. 'p1=v1, p2=v2'"))

  dependencies += [
    ("row_format", "SerDe", "serde_name"),
    ("row_format", "SerDe", "serde_properties"),
  ]

  # File Format
  file_format = forms.ChoiceField(required=False, initial="TextFile",
                        choices=common.to_choices(["TextFile", "SequenceFile", "InputFormat"]),
                        widget=forms.RadioSelect)
  input_format_class = forms.CharField(required=False, label=_t("InputFormat Class"))
  output_format_class = forms.CharField(required=False, label=_t("OutputFormat Class"))

  dependencies += [
    ("file_format", "InputFormat", "input_format_class"),
    ("file_format", "InputFormat", "output_format_class"),
  ]

  # External?
  use_default_location = forms.BooleanField(required=False, initial=True, label=_t("Use default location."))
  external_location = forms.CharField(required=False, help_text=_t("Path to HDFS directory or file of table data."))

  dependencies += [
    ("use_default_location", False, "external_location")
  ]

  def clean_field_terminator(self):
    return _clean_terminator(self.cleaned_data.get('field_terminator'))

  def clean_collection_terminator(self):
    return _clean_terminator(self.cleaned_data.get('collection_terminator'))

  def clean_map_key_terminator(self):
    return _clean_terminator(self.cleaned_data.get('map_key_terminator'))

  def clean_name(self):
    return _clean_tablename(self.db, self.cleaned_data['name'], self.database)
Exemple #14
0
class PartitionTypeForm(forms.Form):
    column_name = common.HiveIdentifierField(required=True)
    column_type = forms.ChoiceField(
        required=True, choices=common.to_choices(HIVE_PRIMITIVE_TYPES))
Exemple #15
0
class ReportColumnForm(forms.Form):
    """
  A form representing a column in the report.
  """
    # If not 'display', then source must be 'table'
    display = forms.BooleanField(label='Display', required=False, initial=True)

    # Shown iff 'display'. 'source' is not required, but will be set during clean
    source = forms.ChoiceField(label='Source',
                               required=False,
                               initial='table',
                               choices=common.to_choices(
                                   common.SELECTION_SOURCE))
    # Shown iff 'display'
    agg = forms.ChoiceField(label='Aggregate',
                            required=False,
                            choices=common.to_choices(common.AGGREGATIONS))
    # Shown iff 'display'
    distinct = forms.BooleanField(label="Distinct", required=False)

    # Shown iff 'source' is 'constant'
    constant = forms.CharField(label='Constant value', required=False)

    # Shown iff 'source' is 'table'
    table_alias = common.HiveIdentifierField(label='Table alias',
                                             required=False)
    # Shown iff 'source' is 'table'
    col = forms.CharField(label='From column', required=False)
    # Shown iff 'display', and 'source' is 'table'
    col_alias = common.HiveIdentifierField(label='Column alias',
                                           required=False)
    # Shown iff 'display', and 'source' is 'table'
    sort = forms.ChoiceField(label='Sort',
                             required=False,
                             choices=common.to_choices(common.SORT_OPTIONS))
    # Shown iff 'sort'
    sort_order = forms.IntegerField(label='Sort order',
                                    required=False,
                                    min_value=1)
    # Shown iff 'display', and 'source' is 'table'
    group_order = forms.IntegerField(label='Group order',
                                     required=False,
                                     min_value=1)

    def __init__(self, *args, **kwargs):
        forms.Form.__init__(self, *args, **kwargs)
        # Shown iff 'source' is 'table'
        self.fields['table'] = common.HiveTableChoiceField(label='From table',
                                                           required=False)

    def _display_check(self):
        """Reconcile 'display' with 'source'"""
        src = self.cleaned_data.get('source')
        if not self.cleaned_data.get('display'):
            if src and src != 'table':
                raise forms.ValidationError(
                    'Source must be "table" when not displaying column')
            self.cleaned_data['source'] = 'table'
            if self.cleaned_data.get('col_alias'):
                raise forms.ValidationError(
                    'Column alias not applicable when not displaying column')
        else:
            if not src:
                raise forms.ValidationError('Source value missing')

    def clean_display(self):
        """Make sure display is set"""
        return self.cleaned_data.get('display', False)

    def clean_sort(self):
        """Set sort_hql accordingly"""
        dir = self.cleaned_data.get('sort')
        if dir == 'ascending':
            self.cleaned_data['sort_hql'] = 'ASC'
        elif dir == 'descending':
            self.cleaned_data['sort_hql'] = 'DESC'
        elif self.cleaned_data.has_key('sort_hql'):
            del self.cleaned_data['sort_hql']
        return dir

    def clean(self):
        self.qtable = None
        self.selection = None

        self._display_check()

        if self.cleaned_data.get('sort') and not self.cleaned_data['sort_hql']:
            raise KeyError()

        # Verify that the 'source' field is consistent with the other fields
        source = self.cleaned_data.get('source')
        if not source:
            return None  # No point since we can't get source

        constant_val = self.cleaned_data.get('constant')
        _field_source_check(source,
                            'Constant',
                            constant_val,
                            is_from_table=False)

        table_val = self.cleaned_data.get('table')
        _field_source_check(source,
                            'From table',
                            table_val,
                            is_from_table=True)

        col_val = self.cleaned_data.get('col')
        _field_source_check(source, 'From column', col_val, is_from_table=True)

        if self.cleaned_data.get(
                'sort', '') and not self.cleaned_data.get('sort_order', ''):
            raise forms.ValidationError('Sort order missing')

        if table_val:
            # Column must belong to the table
            self.qtable = report_gen.QTable(
                table_val, self.cleaned_data.get('table_alias'))
            if col_val == '*':
                if self.cleaned_data.get('col_alias'):
                    raise forms.ValidationError(
                        'Alias not applicable for selecting "*"')
            elif col_val not in self.qtable.get_columns():
                raise forms.ValidationError('Invalid column name "%s"' %
                                            (col_val, ))
            # ColumnSelection object
            self.selection = report_gen.ColumnSelection(
                self.qtable, col_val, self.cleaned_data.get('col_alias'))
        else:
            # ConstSelection object
            self.selection = report_gen.ConstSelection(
                constant_val, self.cleaned_data.get('col_alias'))
        self.selection.distinct = self.cleaned_data.get('distinct', False)
        self.selection.set_aggregation(self.cleaned_data.get('agg', ''))

        if self.errors:
            delattr(self, 'selection')
        return self.cleaned_data