Ejemplo n.º 1
0
def import_wizard(request, database='default'):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  db = dbms.get(request.user)
  dbs = db.get_databases()
  databases = [{'name':db, 'url':reverse('beeswax:import_wizard', kwargs={'database': db})} for db in dbs]

  if request.method == 'POST':
    #
    # General processing logic:
    # - We have 3 steps. Each requires the previous.
    #   * Step 1      : Table name and file location
    #   * Step 2a     : Display sample with auto chosen delim
    #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
    #   * Step 3      : Display sample, and define columns
    # - Each step is represented by a different form. The form of an earlier step
    #   should be present when submitting to a later step.
    # - To preserve the data from the earlier steps, we send the forms back as
    #   hidden fields. This way, when users revisit a previous step, the data would
    #   be there as well.
    #
    delim_is_auto = False
    fields_list, n_cols = [[]], 0
    s3_col_formset = None
    s1_file_form = CreateByImportFileForm(request.POST, db=db)

    if s1_file_form.is_valid():
      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      if len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1:
        raise PopupException(_('Invalid form submission'))

      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],),
                                                            (s2_delim_form.cleaned_data['delimiter'],))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
          'databases': databases
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        if s3_col_formset is None:
          columns = []
          for i in range(n_cols):
            columns.append({
                'column_name': 'col_%s' % (i,),
                'column_type': 'string',
            })
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        try:
          fields_list_for_json = list(fields_list)
          if fields_list_for_json:
            fields_list_for_json[0] = map(lambda a: re.sub('[^\w]', '', a), fields_list_for_json[0]) # Cleaning headers

          return render('define_columns.mako', request, {
            'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
            'file_form': s1_file_form,
            'delim_form': s2_delim_form,
            'column_formset': s3_col_formset,
            'fields_list': fields_list,
            'fields_list_json': json.dumps(fields_list_for_json),
            'n_cols': n_cols,
            'database': database,
            'databases': databases
          })
        except Exception, e:
          raise PopupException(_("The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."), detail=e)

      #
      # Final: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']
        proposed_query = django_mako.render_to_string("create_table_statement.mako", {
            'table': {
                'name': table_name,
                'comment': s1_file_form.cleaned_data['comment'],
                'row_format': 'Delimited',
                'field_terminator': delim
             },
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
            'databases': databases
          }
        )

        do_load_data = s1_file_form.cleaned_data.get('do_import')
        path = s1_file_form.cleaned_data['path']
        try:
          return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database)
        except QueryServerException, e:
          raise PopupException(_('The table could not be created.'), detail=e.message)
Ejemplo n.º 2
0
def import_wizard(request, database="default"):
    """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
    encoding = i18n.get_site_encoding()
    app_name = get_app_name(request)

    db = dbms.get(request.user)
    dbs = db.get_databases()
    databases = [{"name": db, "url": reverse("beeswax:import_wizard", kwargs={"database": db})} for db in dbs]

    if request.method == "POST":
        #
        # General processing logic:
        # - We have 3 steps. Each requires the previous.
        #   * Step 1      : Table name and file location
        #   * Step 2a     : Display sample with auto chosen delim
        #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
        #   * Step 3      : Display sample, and define columns
        # - Each step is represented by a different form. The form of an earlier step
        #   should be present when submitting to a later step.
        # - To preserve the data from the earlier steps, we send the forms back as
        #   hidden fields. This way, when users revisit a previous step, the data would
        #   be there as well.
        #
        delim_is_auto = False
        fields_list, n_cols = [[]], 0
        s3_col_formset = None
        s1_file_form = CreateByImportFileForm(request.POST, db=db)

        if s1_file_form.is_valid():
            do_s2_auto_delim = request.POST.get("submit_file")  # Step 1 -> 2
            do_s2_user_delim = request.POST.get("submit_preview")  # Step 2 -> 2
            do_s3_column_def = request.POST.get("submit_delim")  # Step 2 -> 3
            do_hive_create = request.POST.get("submit_create")  # Step 3 -> execute

            cancel_s2_user_delim = request.POST.get("cancel_delim")  # Step 2 -> 1
            cancel_s3_column_def = request.POST.get("cancel_create")  # Step 3 -> 2

            # Exactly one of these should be True
            if (
                len(
                    filter(
                        None,
                        (
                            do_s2_auto_delim,
                            do_s2_user_delim,
                            do_s3_column_def,
                            do_hive_create,
                            cancel_s2_user_delim,
                            cancel_s3_column_def,
                        ),
                    )
                )
                != 1
            ):
                raise PopupException(_("Invalid form submission"))

            if not do_s2_auto_delim:
                # We should have a valid delim form
                s2_delim_form = CreateByImportDelimForm(request.POST)
                if not s2_delim_form.is_valid():
                    # Go back to picking delimiter
                    do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
            if do_hive_create:
                # We should have a valid columns formset
                s3_col_formset = ColumnTypeFormSet(prefix="cols", data=request.POST)
                if not s3_col_formset.is_valid():
                    # Go back to define columns
                    do_s3_column_def, do_hive_create = True, False

            #
            # Go to step 2: We've just picked the file. Preview it.
            #
            if do_s2_auto_delim:
                delim_is_auto = True
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS
                )

            if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
                # Delimit based on input
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs,
                    s1_file_form,
                    encoding,
                    (s2_delim_form.cleaned_data["file_type"],),
                    (s2_delim_form.cleaned_data["delimiter"],),
                )

            if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
                return render(
                    "choose_delimiter.mako",
                    request,
                    {
                        "action": reverse(app_name + ":import_wizard", kwargs={"database": database}),
                        "delim_readable": DELIMITER_READABLE.get(
                            s2_delim_form["delimiter"].data[0], s2_delim_form["delimiter"].data[1]
                        ),
                        "initial": delim_is_auto,
                        "file_form": s1_file_form,
                        "delim_form": s2_delim_form,
                        "fields_list": fields_list,
                        "delimiter_choices": TERMINATOR_CHOICES,
                        "n_cols": n_cols,
                        "database": database,
                        "databases": databases,
                    },
                )

            #
            # Go to step 3: Define column.
            #
            if do_s3_column_def:
                if s3_col_formset is None:
                    columns = []
                    for i in range(n_cols):
                        columns.append({"column_name": "col_%s" % (i,), "column_type": "string"})
                    s3_col_formset = ColumnTypeFormSet(prefix="cols", initial=columns)
                try:
                    fields_list_for_json = list(fields_list)
                    if fields_list_for_json:
                        fields_list_for_json[0] = map(
                            lambda a: re.sub("[^\w]", "", a), fields_list_for_json[0]
                        )  # Cleaning headers

                    return render(
                        "define_columns.mako",
                        request,
                        {
                            "action": reverse(app_name + ":import_wizard", kwargs={"database": database}),
                            "file_form": s1_file_form,
                            "delim_form": s2_delim_form,
                            "column_formset": s3_col_formset,
                            "fields_list": fields_list,
                            "fields_list_json": json.dumps(fields_list_for_json),
                            "n_cols": n_cols,
                            "database": database,
                            "databases": databases,
                        },
                    )
                except Exception, e:
                    raise PopupException(
                        _(
                            "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."
                        ),
                        detail=e,
                    )

            #
            # Final: Execute
            #
            if do_hive_create:
                delim = s2_delim_form.cleaned_data["delimiter"]
                table_name = s1_file_form.cleaned_data["name"]
                proposed_query = django_mako.render_to_string(
                    "create_table_statement.mako",
                    {
                        "table": {
                            "name": table_name,
                            "comment": s1_file_form.cleaned_data["comment"],
                            "row_format": "Delimited",
                            "field_terminator": delim,
                        },
                        "columns": [f.cleaned_data for f in s3_col_formset.forms],
                        "partition_columns": [],
                        "database": database,
                        "databases": databases,
                    },
                )

                do_load_data = s1_file_form.cleaned_data.get("do_import")
                path = s1_file_form.cleaned_data["path"]
                return _submit_create_and_load(
                    request, proposed_query, table_name, path, do_load_data, database=database
                )
Ejemplo n.º 3
0
def import_wizard(request, database='default'):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  if request.method == 'POST':
    #
    # General processing logic:
    # - We have 3 steps. Each requires the previous.
    #   * Step 1      : Table name and file location
    #   * Step 2a     : Display sample with auto chosen delim
    #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
    #   * Step 3      : Display sample, and define columns
    # - Each step is represented by a different form. The form of an earlier step
    #   should be present when submitting to a later step.
    # - To preserve the data from the earlier steps, we send the forms back as
    #   hidden fields. This way, when users revisit a previous step, the data would
    #   be there as well.
    #
    delim_is_auto = False
    fields_list, n_cols = [[]], 0
    s3_col_formset = None

    db = dbms.get(request.user)
    s1_file_form = CreateByImportFileForm(request.POST, db=db)

    if s1_file_form.is_valid():
      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      if len(filter(None, (do_s2_auto_delim, do_s2_user_delim, do_s3_column_def, do_hive_create, cancel_s2_user_delim, cancel_s3_column_def))) != 1:
        raise PopupException(_('Invalid form submission'))

      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(request.fs, s1_file_form, encoding, (s2_delim_form.cleaned_data['file_type'],),
                                                            (s2_delim_form.cleaned_data['delimiter'],))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        if s3_col_formset is None:
          columns = []
          for i in range(n_cols):
            columns.append(dict(
                column_name='col_%s' % (i,),
                column_type='string',
            ))
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        return render('define_columns.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'column_formset': s3_col_formset,
          'fields_list': fields_list,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Final: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']
        proposed_query = django_mako.render_to_string("create_table_statement.mako", {
            'table': dict(name=table_name,
                          comment=s1_file_form.cleaned_data['comment'],
                          row_format='Delimited',
                          field_terminator=delim),
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
          }
        )

        do_load_data = s1_file_form.cleaned_data.get('do_import')
        path = s1_file_form.cleaned_data['path']
        return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database)
  else:
    s1_file_form = CreateByImportFileForm()

  return render('choose_file.mako', request, {
    'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
    'file_form': s1_file_form,
    'database': database,
  })
Ejemplo n.º 4
0
def import_wizard(request, database='default'):
    """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
    encoding = i18n.get_site_encoding()
    app_name = get_app_name(request)

    db = dbms.get(request.user)
    dbs = db.get_databases()
    databases = [{
        'name':
        db,
        'url':
        reverse('beeswax:import_wizard', kwargs={'database': db})
    } for db in dbs]

    if request.method == 'POST':
        #
        # General processing logic:
        # - We have 3 steps. Each requires the previous.
        #   * Step 1      : Table name and file location
        #   * Step 2a     : Display sample with auto chosen delim
        #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
        #   * Step 3      : Display sample, and define columns
        # - Each step is represented by a different form. The form of an earlier step
        #   should be present when submitting to a later step.
        # - To preserve the data from the earlier steps, we send the forms back as
        #   hidden fields. This way, when users revisit a previous step, the data would
        #   be there as well.
        #
        delim_is_auto = False
        fields_list, n_cols = [[]], 0
        s3_col_formset = None
        s1_file_form = CreateByImportFileForm(request.POST, db=db)

        if s1_file_form.is_valid():
            do_s2_auto_delim = request.POST.get('submit_file')  # Step 1 -> 2
            do_s2_user_delim = request.POST.get(
                'submit_preview')  # Step 2 -> 2
            do_s3_column_def = request.POST.get('submit_delim')  # Step 2 -> 3
            do_hive_create = request.POST.get(
                'submit_create')  # Step 3 -> execute

            cancel_s2_user_delim = request.POST.get(
                'cancel_delim')  # Step 2 -> 1
            cancel_s3_column_def = request.POST.get(
                'cancel_create')  # Step 3 -> 2

            # Exactly one of these should be True
            if len([
                    _f for _f in (do_s2_auto_delim, do_s2_user_delim,
                                  do_s3_column_def, do_hive_create,
                                  cancel_s2_user_delim, cancel_s3_column_def)
                    if _f
            ]) != 1:
                raise PopupException(_('Invalid form submission'))

            if not do_s2_auto_delim:
                # We should have a valid delim form
                s2_delim_form = CreateByImportDelimForm(request.POST)
                if not s2_delim_form.is_valid():
                    # Go back to picking delimiter
                    do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False
            if do_hive_create:
                # We should have a valid columns formset
                s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                   data=request.POST)
                if not s3_col_formset.is_valid():
                    # Go back to define columns
                    do_s3_column_def, do_hive_create = True, False

            load_data = s1_file_form.cleaned_data.get('load_data',
                                                      'IMPORT').upper()
            path = s1_file_form.cleaned_data['path']

            #
            # Go to step 2: We've just picked the file. Preview it.
            #
            if do_s2_auto_delim:
                try:
                    if load_data == 'IMPORT':
                        if not request.fs.isfile(path):
                            raise PopupException(
                                _('Path location must refer to a file if "Import Data" is selected.'
                                  ))
                    elif load_data == 'EXTERNAL':
                        if not request.fs.isdir(path):
                            raise PopupException(
                                _('Path location must refer to a directory if "Create External Table" is selected.'
                                  ))
                except (IOError, S3FileSystemException) as e:
                    raise PopupException(
                        _('Path location "%s" is invalid: %s') % (path, e))

                delim_is_auto = True
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    [reader.TYPE for reader in FILE_READERS], DELIMITERS)

            if (do_s2_user_delim or do_s3_column_def
                    or cancel_s3_column_def) and s2_delim_form.is_valid():
                # Delimit based on input
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding,
                    (s2_delim_form.cleaned_data['file_type'], ),
                    (s2_delim_form.cleaned_data['delimiter'], ))

            if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
                apps_list = _get_apps(request.user, '')
                return render(
                    'import_wizard_choose_delimiter.mako', request, {
                        'apps':
                        apps_list,
                        'action':
                        reverse(app_name + ':import_wizard',
                                kwargs={'database': database}),
                        'delim_readable':
                        DELIMITER_READABLE.get(
                            s2_delim_form['delimiter'].data[0],
                            s2_delim_form['delimiter'].data[1]),
                        'initial':
                        delim_is_auto,
                        'file_form':
                        s1_file_form,
                        'delim_form':
                        s2_delim_form,
                        'fields_list':
                        fields_list,
                        'delimiter_choices':
                        TERMINATOR_CHOICES,
                        'n_cols':
                        n_cols,
                        'database':
                        database,
                        'databases':
                        databases
                    })

            #
            # Go to step 3: Define column.
            #
            if do_s3_column_def:
                if s3_col_formset is None:
                    columns = []
                    for i in range(n_cols):
                        columns.append({
                            'column_name': 'col_%s' % (i, ),
                            'column_type': 'string',
                        })
                    s3_col_formset = ColumnTypeFormSet(prefix='cols',
                                                       initial=columns)
                try:
                    fields_list_for_json = list(fields_list)
                    if fields_list_for_json:
                        fields_list_for_json[0] = [
                            re.sub('[^\w]', '', a)
                            for a in fields_list_for_json[0]
                        ]  # Cleaning headers
                    apps_list = _get_apps(request.user, '')
                    return render(
                        'import_wizard_define_columns.mako', request, {
                            'apps':
                            apps_list,
                            'action':
                            reverse(app_name + ':import_wizard',
                                    kwargs={'database': database}),
                            'file_form':
                            s1_file_form,
                            'delim_form':
                            s2_delim_form,
                            'column_formset':
                            s3_col_formset,
                            'fields_list':
                            fields_list,
                            'fields_list_json':
                            json.dumps(fields_list_for_json),
                            'n_cols':
                            n_cols,
                            'database':
                            database,
                            'databases':
                            databases
                        })
                except Exception as e:
                    raise PopupException(_(
                        "The selected delimiter is creating an un-even number of columns. Please make sure you don't have empty columns."
                    ),
                                         detail=e)

            #
            # Final: Execute
            #
            if do_hive_create:
                delim = s2_delim_form.cleaned_data['delimiter']
                table_name = s1_file_form.cleaned_data['name']

                proposed_query = django_mako.render_to_string(
                    "create_table_statement.mako", {
                        'table': {
                            'name':
                            table_name,
                            'comment':
                            s1_file_form.cleaned_data['comment'],
                            'row_format':
                            'Delimited',
                            'field_terminator':
                            delim,
                            'file_format':
                            'TextFile',
                            'load_data':
                            load_data,
                            'path':
                            path,
                            'skip_header':
                            request.GET.get('removeHeader', 'off').lower()
                            == 'on'
                        },
                        'columns':
                        [f.cleaned_data for f in s3_col_formset.forms],
                        'partition_columns': [],
                        'database': database,
                        'databases': databases
                    })
                try:
                    return _submit_create_and_load(request,
                                                   proposed_query,
                                                   table_name,
                                                   path,
                                                   load_data,
                                                   database=database)
                except QueryServerException as e:
                    raise PopupException(_('The table could not be created.'),
                                         detail=e.message)
    else:
        s1_file_form = CreateByImportFileForm()

    return render(
        'import_wizard_choose_file.mako', request, {
            'action':
            reverse(app_name + ':import_wizard', kwargs={'database': database
                                                         }),
            'file_form':
            s1_file_form,
            'database':
            database,
            'databases':
            databases
        })
Ejemplo n.º 5
0
def import_wizard(request, database=None):
  """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
  database = _get_last_database(request, database)
  encoding = i18n.get_site_encoding()
  app_name = get_app_name(request)

  if request.method == 'POST':
    # Have a while loop to allow an easy way to break
    for _ in range(1):
      #
      # General processing logic:
      # - We have 3 steps. Each requires the previous.
      #   * Step 1      : Table name and file location
      #   * Step 2a     : Display sample with auto chosen delim
      #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
      #   * Step 3      : Display sample, and define columns
      # - Each step is represented by a different form. The form of an earlier step
      #   should be present when submitting to a later step.
      # - To preserve the data from the earlier steps, we send the forms back as
      #   hidden fields. This way, when users revisit a previous step, the data would
      #   be there as well.
      #
      delim_is_auto = False
      fields_list, n_cols = [ [] ], 0
      s3_col_formset = None

      # Everything requires a valid file form
      db = dbms.get(request.user)
      s1_file_form = CreateByImportFileForm(request.POST, db=db)
      if not s1_file_form.is_valid():
        break

      do_s2_auto_delim = request.POST.get('submit_file')        # Step 1 -> 2
      do_s2_user_delim = request.POST.get('submit_preview')     # Step 2 -> 2
      do_s3_column_def = request.POST.get('submit_delim')       # Step 2 -> 3
      do_hive_create = request.POST.get('submit_create')        # Step 3 -> execute

      cancel_s2_user_delim = request.POST.get('cancel_delim')   # Step 2 -> 1
      cancel_s3_column_def = request.POST.get('cancel_create')  # Step 3 -> 2

      # Exactly one of these should be True
      assert len(filter(None, (do_s2_auto_delim,
                               do_s2_user_delim,
                               do_s3_column_def,
                               do_hive_create,
                               cancel_s2_user_delim,
                               cancel_s3_column_def))) == 1, 'Invalid form submission'

      #
      # Fix up what we should do in case any form is invalid
      #
      if not do_s2_auto_delim:
        # We should have a valid delim form
        s2_delim_form = CreateByImportDelimForm(request.POST)
        if not s2_delim_form.is_valid():
          # Go back to picking delimiter
          do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False

      if do_hive_create:
        # We should have a valid columns formset
        s3_col_formset = ColumnTypeFormSet(prefix='cols', data=request.POST)
        if not s3_col_formset.is_valid():
          # Go back to define columns
          do_s3_column_def, do_hive_create = True, False

      #
      # Go to step 2: We've just picked the file. Preview it.
      #
      if do_s2_auto_delim:
        delim_is_auto = True
        fields_list, n_cols, s2_delim_form = _delim_preview(
                                              request.fs,
                                              s1_file_form,
                                              encoding,
                                              [ reader.TYPE for reader in FILE_READERS ],
                                              DELIMITERS,
                                              False)

      if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
        # Delimit based on input
        fields_list, n_cols, s2_delim_form = _delim_preview(
                                              request.fs,
                                              s1_file_form,
                                              encoding,
                                              (s2_delim_form.cleaned_data['file_type'],),
                                              (s2_delim_form.cleaned_data['delimiter'],),
                                              s2_delim_form.cleaned_data.get('read_column_headers'))

      if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
        return render('choose_delimiter.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'delim_readable': DELIMITER_READABLE.get(s2_delim_form['delimiter'].data[0], s2_delim_form['delimiter'].data[1]),
          'initial': delim_is_auto,
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'fields_list': fields_list,
          'delimiter_choices': TERMINATOR_CHOICES,
          'n_cols': n_cols,
          'database': database,
        })

      #
      # Go to step 3: Define column.
      #
      if do_s3_column_def:
        read_column_headers = s2_delim_form.cleaned_data.get('read_column_headers')
        if s3_col_formset is None or not read_column_headers:
          columns = []
          if read_column_headers and fields_list:
            first_row = fields_list[0]
            for i in range(n_cols):
              columns.append(dict(
                  column_name=first_row[i] if i < len(first_row) else 'col_%s' % (i + 1,),
                  column_type='string',
              ))
            fields_list = fields_list[1:]
          else:
            for i in range(n_cols):
              columns.append(dict(
                  column_name='col_%s' % (i + 1,),
                  column_type='string',
              ))
          s3_col_formset = ColumnTypeFormSet(prefix='cols', initial=columns)
        return render('define_columns.mako', request, {
          'action': reverse(app_name + ':import_wizard', kwargs={'database': database}),
          'file_form': s1_file_form,
          'delim_form': s2_delim_form,
          'column_formset': s3_col_formset,
          'fields_list': fields_list,
          'n_cols': n_cols,
           'database': database,
        })

      #
      # Finale: Execute
      #
      if do_hive_create:
        delim = s2_delim_form.cleaned_data['delimiter']
        table_name = s1_file_form.cleaned_data['name']
        proposed_query = django_mako.render_to_string("create_table_statement.mako",
          {
            'table': dict(name=table_name,
                          comment=s1_file_form.cleaned_data['comment'],
                          row_format='Delimited',
                          field_terminator=delim),
            'columns': [ f.cleaned_data for f in s3_col_formset.forms ],
            'partition_columns': [],
            'database': database,
          }
        )

        do_load_data = s1_file_form.cleaned_data.get('do_import')
        path = s1_file_form.cleaned_data.get('path')
        read_column_headers = s2_delim_form.cleaned_data.get('read_column_headers')
        if read_column_headers and do_load_data:
          file_type = s2_delim_form.cleaned_data.get('file_type')
          file_readers = [ reader for reader in FILE_READERS if reader.TYPE == file_type ]
          if len(file_readers) == 1:
            try:
              file_obj = request.fs.open(path)
              _skip_first_line_in_file(file_readers[0],
                                       request.fs,
                                       path,
                                       file_readers[0].find(file_obj, '\n') + 1)
            except Exception, ex:
              msg = _('Cannot process file: %s' % (ex,))
              LOG.error(msg)
              raise PopupException(msg)
        return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data, database=database)
Ejemplo n.º 6
0
def import_wizard(request):
    """
  Help users define table and based on a file they want to import to Hive.
  Limitations:
    - Rows are delimited (no serde).
    - No detection for map and array types.
    - No detection for the presence of column header in the first row.
    - No partition table.
    - Does not work with binary data.
  """
    encoding = i18n.get_site_encoding()

    if request.method == "POST":
        # Have a while loop to allow an easy way to break
        for _ in range(1):
            #
            # General processing logic:
            # - We have 3 steps. Each requires the previous.
            #   * Step 1      : Table name and file location
            #   * Step 2a     : Display sample with auto chosen delim
            #   * Step 2b     : Display sample with user chosen delim (if user chooses one)
            #   * Step 3      : Display sample, and define columns
            # - Each step is represented by a different form. The form of an earlier step
            #   should be present when submitting to a later step.
            # - To preserve the data from the earlier steps, we send the forms back as
            #   hidden fields. This way, when users revisit a previous step, the data would
            #   be there as well.
            #
            delim_is_auto = False
            fields_list, n_cols = [[]], 0
            s3_col_formset = None

            # Everything requires a valid file form
            db = dbms.get(request.user)
            s1_file_form = CreateByImportFileForm(request.POST, db=db)
            if not s1_file_form.is_valid():
                break

            do_s2_auto_delim = request.POST.get("submit_file")  # Step 1 -> 2
            do_s2_user_delim = request.POST.get("submit_preview")  # Step 2 -> 2
            do_s3_column_def = request.POST.get("submit_delim")  # Step 2 -> 3
            do_hive_create = request.POST.get("submit_create")  # Step 3 -> execute

            cancel_s2_user_delim = request.POST.get("cancel_delim")  # Step 2 -> 1
            cancel_s3_column_def = request.POST.get("cancel_create")  # Step 3 -> 2

            # Exactly one of these should be True
            assert (
                len(
                    filter(
                        None,
                        (
                            do_s2_auto_delim,
                            do_s2_user_delim,
                            do_s3_column_def,
                            do_hive_create,
                            cancel_s2_user_delim,
                            cancel_s3_column_def,
                        ),
                    )
                )
                == 1
            ), "Invalid form submission"

            #
            # Fix up what we should do in case any form is invalid
            #
            if not do_s2_auto_delim:
                # We should have a valid delim form
                s2_delim_form = CreateByImportDelimForm(request.POST)
                if not s2_delim_form.is_valid():
                    # Go back to picking delimiter
                    do_s2_user_delim, do_s3_column_def, do_hive_create = True, False, False

            if do_hive_create:
                # We should have a valid columns formset
                s3_col_formset = ColumnTypeFormSet(prefix="cols", data=request.POST)
                if not s3_col_formset.is_valid():
                    # Go back to define columns
                    do_s3_column_def, do_hive_create = True, False

            #
            # Go to step 2: We've just picked the file. Preview it.
            #
            if do_s2_auto_delim:
                delim_is_auto = True
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs, s1_file_form, encoding, [reader.TYPE for reader in FILE_READERS], DELIMITERS
                )

            if (do_s2_user_delim or do_s3_column_def or cancel_s3_column_def) and s2_delim_form.is_valid():
                # Delimit based on input
                fields_list, n_cols, s2_delim_form = _delim_preview(
                    request.fs,
                    s1_file_form,
                    encoding,
                    (s2_delim_form.cleaned_data["file_type"],),
                    (s2_delim_form.cleaned_data["delimiter"],),
                )

            if do_s2_auto_delim or do_s2_user_delim or cancel_s3_column_def:
                return render(
                    "choose_delimiter.mako",
                    request,
                    dict(
                        action=urlresolvers.reverse(import_wizard),
                        delim_readable=DELIMITER_READABLE.get(
                            s2_delim_form["delimiter"].data[0], s2_delim_form["delimiter"].data[1]
                        ),
                        initial=delim_is_auto,
                        file_form=s1_file_form,
                        delim_form=s2_delim_form,
                        fields_list=fields_list,
                        delimiter_choices=TERMINATOR_CHOICES,
                        n_cols=n_cols,
                    ),
                )

            #
            # Go to step 3: Define column.
            #
            if do_s3_column_def:
                if s3_col_formset is None:
                    columns = []
                    for i in range(n_cols):
                        columns.append(dict(column_name="col_%s" % (i,), column_type="string"))
                    s3_col_formset = ColumnTypeFormSet(prefix="cols", initial=columns)
                return render(
                    "define_columns.mako",
                    request,
                    dict(
                        action=urlresolvers.reverse(import_wizard),
                        file_form=s1_file_form,
                        delim_form=s2_delim_form,
                        column_formset=s3_col_formset,
                        fields_list=fields_list,
                        n_cols=n_cols,
                    ),
                )

            #
            # Finale: Execute
            #
            if do_hive_create:
                delim = s2_delim_form.cleaned_data["delimiter"]
                table_name = s1_file_form.cleaned_data["name"]
                proposed_query = django_mako.render_to_string(
                    "create_table_statement.mako",
                    {
                        "table": dict(
                            name=table_name,
                            comment=s1_file_form.cleaned_data["comment"],
                            row_format="Delimited",
                            field_terminator=delim,
                        ),
                        "columns": [f.cleaned_data for f in s3_col_formset.forms],
                        "partition_columns": [],
                    },
                )

                do_load_data = s1_file_form.cleaned_data.get("do_import")
                path = s1_file_form.cleaned_data["path"]
                return _submit_create_and_load(request, proposed_query, table_name, path, do_load_data)
    else:
        s1_file_form = CreateByImportFileForm()

    return render("choose_file.mako", request, dict(action=urlresolvers.reverse(import_wizard), file_form=s1_file_form))