Esempio n. 1
0
  def autocomplete(self, snippet, database=None, table=None, column=None, nested=None):
    response = {}

    try:
      if database is None:
        response['databases'] = ['default']
      elif table is None:
        response['tables_meta'] = get_topics()
      else:
        response = {
          u'status': 0,
          u'comment': u'test test test 22',
          u'hdfs_link': u'/filebrowser/view=/user/hive/warehouse/web_logs',
          u'extended_columns': [
            {u'comment': u'', u'type': u'bigint', u'name': u'_version_'},
            {u'comment': u'The app', u'type': u'string', u'name': u'app'},
            {u'comment': u'test test   test 22', u'type': u'smallint', u'name': u'bytes'},
            {u'comment': u'The citi', u'type': u'string', u'name': u'city'},
            {u'comment': u'', u'type': u'string', u'name': u'client_ip'},
            {u'comment': u'', u'type': u'tinyint', u'name': u'code'},
            {u'comment': u'', u'type': u'string', u'name': u'country_code'},
            {u'comment': u'', u'type': u'string', u'name': u'country_code3'},
            {u'comment': u'', u'type': u'string', u'name': u'country_name'},
            {u'comment': u'', u'type': u'string', u'name': u'device_family'},
            {u'comment': u'', u'type': u'string', u'name': u'extension'},
            {u'comment': u'', u'type': u'float', u'name': u'latitude'},
            {u'comment': u'', u'type': u'float', u'name': u'longitude'},
            {u'comment': u'', u'type': u'string', u'name': u'method'},
            {u'comment': u'', u'type': u'string', u'name': u'os_family'},
            {u'comment': u'', u'type': u'string', u'name': u'os_major'},
            {u'comment': u'', u'type': u'string', u'name': u'protocol'},
            {u'comment': u'', u'type': u'string', u'name': u'record'},
            {u'comment': u'', u'type': u'string', u'name': u'referer'},
            {u'comment': u'', u'type': u'bigint', u'name': u'region_code'},
            {u'comment': u'', u'type': u'string', u'name': u'request'},
            {u'comment': u'', u'type': u'string', u'name': u'subapp'},
            {u'comment': u'', u'type': u'string', u'name': u'time'},
            {u'comment': u'', u'type': u'string', u'name': u'url'},
            {u'comment': u'', u'type': u'string', u'name': u'user_agent'},
            {u'comment': u'', u'type': u'string', u'name': u'user_agent_family'},
            {u'comment': u'', u'type': u'string', u'name': u'user_agent_major'},
            {u'comment': u'', u'type': u'string', u'name': u'id'},
            {u'comment': u'', u'type': u'string', u'name': u'date'}
          ],
          u'support_updates': False,
          u'partition_keys': [
            {u'type': u'string', u'name': u'date'}
          ],
          u'columns': [u'_version_', u'app', u'bytes', u'city', u'client_ip', u'code', u'country_code', u'country_code3', u'country_name', u'device_family', u'extension', u'latitude', u'longitude', u'method', u'os_family', u'os_major', u'protocol', u'record', u'referer', u'region_code', u'request', u'subapp', u'time', u'url', u'user_agent', u'user_agent_family', u'user_agent_major', u'id', u'date'],
          u'is_view': False
        }

    except Exception as e:
      LOG.warn('Autocomplete data fetching error: %s' % e)
      response['code'] = 500
      response['error'] = e.message

    return response
Esempio n. 2
0
File: api3.py Progetto: hkj123/hue
def guess_format(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        path = urllib_unquote(file_format["path"])
        indexer = MorphlineIndexer(request.user, request.fs)
        if not request.fs.isfile(path):
            raise PopupException(
                _('Path %(path)s is not a file') % file_format)

        stream = request.fs.open(path)
        format_ = indexer.guess_format(
            {"file": {
                "stream": stream,
                "name": path
            }})
        _convert_format(format_)
    elif file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        try:
            table_metadata = db.get_table(database=file_format['databaseName'],
                                          table_name=file_format['tableName'])
        except Exception as e:
            raise PopupException(
                e.message if hasattr(e, 'message') and e.message else e)
        storage = {}
        for delim in table_metadata.storage_details:
            if delim['data_type']:
                if '=' in delim['data_type']:
                    key, val = delim['data_type'].split('=', 1)
                    storage[key] = val
                else:
                    storage[delim['data_type']] = delim['comment']
        if table_metadata.details['properties']['format'] == 'text':
            format_ = {
                "quoteChar": "\"",
                "recordSeparator": '\\n',
                "type": "csv",
                "hasHeader": False,
                "fieldSeparator": storage.get('field.delim', ',')
            }
        elif table_metadata.details['properties']['format'] == 'parquet':
            format_ = {
                "type": "parquet",
                "hasHeader": False,
            }
        else:
            raise PopupException(
                'Hive table format %s is not supported.' %
                table_metadata.details['properties']['format'])
    elif file_format['inputFormat'] == 'query':
        format_ = {
            "quoteChar": "\"",
            "recordSeparator": "\\n",
            "type": "csv",
            "hasHeader": False,
            "fieldSeparator": "\u0001"
        }
    elif file_format['inputFormat'] == 'rdbms':
        format_ = {"type": "csv"}
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            format_ = {
                "type": "json",
                # "fieldSeparator": ",",
                # "hasHeader": True,
                # "quoteChar": "\"",
                # "recordSeparator": "\\n",
                'topics': get_topics(request.user)
            }
        elif file_format['streamSelection'] == 'flume':
            format_ = {
                "type": "csv",
                "fieldSeparator": ",",
                "hasHeader": True,
                "quoteChar": "\"",
                "recordSeparator": "\\n"
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            format_ = {
                "type":
                "csv",
                "fieldSeparator":
                ",",
                "hasHeader":
                True,
                "quoteChar":
                "\"",
                "recordSeparator":
                "\\n",
                'objects': [
                    sobject['name']
                    for sobject in sf.restful('sobjects/')['sobjects']
                    if sobject['queryable']
                ]
            }
        else:
            raise PopupException(
                _('Input format %(inputFormat)s connector not recognized: $(connectorSelection)s'
                  ) % file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    format_['status'] = 0
    return JsonResponse(format_)
Esempio n. 3
0
          storage[key] = val
        else:
          storage[delim['data_type']] = delim['comment']
    if table_metadata.details['properties']['format'] == 'text':
      format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',')}
    elif table_metadata.details['properties']['format'] == 'parquet':
      format_ = {"type": "parquet", "hasHeader": False,}
    else:
      raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format'])
  elif file_format['inputFormat'] == 'query':
    format_ = {"quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\u0001"}
  elif file_format['inputFormat'] == 'rdbms':
    format_ = RdbmsIndexer(request.user, file_format['rdbmsType']).guess_format()
  elif file_format['inputFormat'] == 'stream':
    if file_format['streamSelection'] == 'kafka':
      format_ = {"type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'topics': get_topics()}
    elif file_format['streamSelection'] == 'sfdc':
      sf = Salesforce(
          username=file_format['streamUsername'],
          password=file_format['streamPassword'],
          security_token=file_format['streamToken']
      )
      format_ = {"type": "csv", "fieldSeparator": ",", "hasHeader": True, "quoteChar": "\"", "recordSeparator": "\\n", 'objects': [sobject['name'] for sobject in sf.restful('sobjects/')['sobjects'] if sobject['queryable']]}

  format_['status'] = 0
  return JsonResponse(format_)


def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))
Esempio n. 4
0
         "type": "csv",
         "hasHeader": False,
         "fieldSeparator": "\u0001"
     }
 elif file_format['inputFormat'] == 'rdbms':
     format_ = RdbmsIndexer(request.user,
                            file_format['rdbmsType']).guess_format()
 elif file_format['inputFormat'] == 'stream':
     if file_format['streamSelection'] == 'kafka':
         format_ = {
             "type": "csv",
             "fieldSeparator": ",",
             "hasHeader": True,
             "quoteChar": "\"",
             "recordSeparator": "\\n",
             'topics': get_topics()
         }
     elif file_format['streamSelection'] == 'sfdc':
         sf = Salesforce(username=file_format['streamUsername'],
                         password=file_format['streamPassword'],
                         security_token=file_format['streamToken'])
         format_ = {
             "type":
             "csv",
             "fieldSeparator":
             ",",
             "hasHeader":
             True,
             "quoteChar":
             "\"",
             "recordSeparator":
Esempio n. 5
0
File: api3.py Progetto: mapr/hue
def guess_format(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))
    file_type = file_format['file_type']
    path = urllib_unquote(file_format["path"])

    if sys.version_info[0] < 3 and (file_type == 'excel' or path[-3:] == 'xls'
                                    or path[-4:] == 'xlsx'):
        return JsonResponse({
            'status':
            -1,
            'message':
            'Python2 based Hue does not support Excel file importer'
        })

    if file_format['inputFormat'] == 'localfile':
        if file_type == 'excel':
            format_ = {"type": "excel", "hasHeader": True}
        else:
            format_ = {
                "quoteChar": "\"",
                "recordSeparator": '\\n',
                "type": "csv",
                "hasHeader": True,
                "fieldSeparator": ","
            }

    elif file_format['inputFormat'] == 'file':
        if path[-3:] == 'xls' or path[-4:] == 'xlsx':
            file_obj = request.fs.open(path)
            if path[-3:] == 'xls':
                df = pd.read_excel(file_obj.read(1024 * 1024 * 1024),
                                   engine='xlrd')
            else:
                df = pd.read_excel(file_obj.read(1024 * 1024 * 1024),
                                   engine='openpyxl')
            _csv_data = df.to_csv(index=False)

            path = excel_to_csv_file_name_change(path)
            request.fs.create(path, overwrite=True, data=_csv_data)

        indexer = MorphlineIndexer(request.user, request.fs)
        if not request.fs.isfile(path):
            raise PopupException(
                _('Path %(path)s is not a file') % file_format)

        stream = request.fs.open(path)
        format_ = indexer.guess_format(
            {"file": {
                "stream": stream,
                "name": path
            }})
        _convert_format(format_)

        if file_format["path"][-3:] == 'xls' or file_format["path"][
                -4:] == 'xlsx':
            format_ = {
                "quoteChar": "\"",
                "recordSeparator": '\\n',
                "type": "excel",
                "hasHeader": True,
                "fieldSeparator": ","
            }

    elif file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        try:
            table_metadata = db.get_table(database=file_format['databaseName'],
                                          table_name=file_format['tableName'])
        except Exception as e:
            raise PopupException(
                e.message if hasattr(e, 'message') and e.message else e)
        storage = {}
        for delim in table_metadata.storage_details:
            if delim['data_type']:
                if '=' in delim['data_type']:
                    key, val = delim['data_type'].split('=', 1)
                    storage[key] = val
                else:
                    storage[delim['data_type']] = delim['comment']
        if table_metadata.details['properties']['format'] == 'text':
            format_ = {
                "quoteChar": "\"",
                "recordSeparator": '\\n',
                "type": "csv",
                "hasHeader": False,
                "fieldSeparator": storage.get('field.delim', ',')
            }
        elif table_metadata.details['properties']['format'] == 'parquet':
            format_ = {
                "type": "parquet",
                "hasHeader": False,
            }
        else:
            raise PopupException(
                'Hive table format %s is not supported.' %
                table_metadata.details['properties']['format'])
    elif file_format['inputFormat'] == 'query':
        format_ = {
            "quoteChar": "\"",
            "recordSeparator": "\\n",
            "type": "csv",
            "hasHeader": False,
            "fieldSeparator": "\u0001"
        }
    elif file_format['inputFormat'] == 'rdbms':
        format_ = {"type": "csv"}
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            format_ = {
                "type": "json",
                # "fieldSeparator": ",",
                # "hasHeader": True,
                # "quoteChar": "\"",
                # "recordSeparator": "\\n",
                'topics': get_topics(request.user)
            }
        elif file_format['streamSelection'] == 'flume':
            format_ = {
                "type": "csv",
                "fieldSeparator": ",",
                "hasHeader": True,
                "quoteChar": "\"",
                "recordSeparator": "\\n"
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            format_ = {
                "type":
                "csv",
                "fieldSeparator":
                ",",
                "hasHeader":
                True,
                "quoteChar":
                "\"",
                "recordSeparator":
                "\\n",
                'objects': [
                    sobject['name']
                    for sobject in sf.restful('sobjects/')['sobjects']
                    if sobject['queryable']
                ]
            }
        else:
            raise PopupException(
                _('Input format %(inputFormat)s connector not recognized: $(connectorSelection)s'
                  ) % file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    format_['status'] = 0
    return JsonResponse(format_)