Ejemplo n.º 1
0
  def process_exception(self, request, exception):
    import traceback
    tb = traceback.format_exc()
    logging.info("Processing exception: %s: %s" % (i18n.smart_unicode(exception),
                                                   i18n.smart_unicode(tb)))

    if hasattr(exception, "response"):
      return exception.response(request)

    if hasattr(exception, "response_data"):
      if request.ajax:
        response = render_json(exception.response_data)
        response[MIDDLEWARE_HEADER] = 'EXCEPTION'
        return response
      else:
        return render("error.mako", request,
                      dict(error=exception.response_data.get("message")))

    # We didn't handle it as a special exception, but if we're ajax we still
    # need to do some kind of nicer handling than the built-in page
    # Note that exception may actually be an Http404 or similar.
    if request.ajax:
      err = _("An error occurred: %(error)s") % {'error': exception}
      logging.exception("Middleware caught an exception")
      return PopupException(err, detail=None).response(request)

    return None
Ejemplo n.º 2
0
  def _invoke(self, method, relpath=None, params=None, data=None, headers=None, files=None, allow_redirects=False, clear_cookies=False, log_response=True):
    """
    Invoke an API method.
    @return: Raw body or JSON dictionary (if response content type is JSON).
    """
    path = self._join_uri(relpath)
    start_time = time.time()
    resp = self._client.execute(method,
                                path,
                                params=params,
                                data=data,
                                headers=headers,
                                files=files,
                                allow_redirects=allow_redirects,
                                urlencode=self._urlencode,
                                clear_cookies=clear_cookies)

    if log_response:
      log_length = conf.REST_RESPONSE_SIZE.get() != -1 and conf.REST_RESPONSE_SIZE.get()
      duration = time.time() - start_time
      message = "%s %s Got response%s: %s%s" % (
          method,
          smart_unicode(path, errors='ignore'),
          ' in %dms' % (duration * 1000),
          smart_unicode(resp.content[:log_length or None], errors='replace'),
          log_length and len(resp.content) > log_length and "..." or ""
      )
      self._client.logger.disabled = 0
      log_if_slow_call(duration=duration, message=message, logger=self._client.logger)

    return resp
Ejemplo n.º 3
0
    def _init_attributes(self):
        self.queueName = i18n.smart_unicode(self.job.profile.queueName)
        self.jobName = i18n.smart_unicode(self.job.profile.name)
        self.user = i18n.smart_unicode(self.job.profile.user)
        self.mapProgress = self.job.status.mapProgress
        self.reduceProgress = self.job.status.reduceProgress
        self.setupProgress = self.job.status.setupProgress
        self.cleanupProgress = self.job.status.cleanupProgress

        if self.job.desiredMaps == 0:
            maps_percent_complete = 0
        else:
            maps_percent_complete = int(
                round(
                    float(self.job.finishedMaps) / self.job.desiredMaps * 100))

        self.desiredMaps = self.job.desiredMaps

        if self.job.desiredReduces == 0:
            reduces_percent_complete = 0
        else:
            reduces_percent_complete = int(
                round(
                    float(self.job.finishedReduces) / self.job.desiredReduces *
                    100))

        self.desiredReduces = self.job.desiredReduces
        self.maps_percent_complete = maps_percent_complete
        self.finishedMaps = self.job.finishedMaps
        self.finishedReduces = self.job.finishedReduces
        self.reduces_percent_complete = reduces_percent_complete
        self.startTimeMs = self.job.startTime
        self.startTimeFormatted = format_unixtime_ms(self.job.startTime)
        self.launchTimeMs = self.job.launchTime
        self.launchTimeFormatted = format_unixtime_ms(self.job.launchTime)

        self.finishTimeMs = self.job.finishTime
        self.finishTimeFormatted = format_unixtime_ms(self.job.finishTime)
        self.status = self.job.status.runStateAsString
        self.priority = self.job.priorityAsString
        self.jobFile = self.job.profile.jobFile

        finishTime = self.job.finishTime
        if finishTime == 0:
            finishTime = datetime.datetime.now()
        else:
            finishTime = datetime.datetime.fromtimestamp(finishTime / 1000)
        self.duration = finishTime - datetime.datetime.fromtimestamp(
            self.job.startTime / 1000)

        diff = int(finishTime.strftime("%s")) * 1000 - self.startTimeMs
        self.durationFormatted = format_duration_in_millis(diff)
        self.durationInMillis = diff
Ejemplo n.º 4
0
  def invoke(self, method, relpath=None, params=None, data=None, headers=None, files=None, allow_redirects=False,
             clear_cookies=False):
    """
    Invoke an API method.
    @return: Raw body or JSON dictionary (if response content type is JSON).
    """
    path = self._join_uri(relpath)
    resp = self._client.execute(method,
                                path,
                                params=params,
                                data=data,
                                headers=headers,
                                files=files,
                                allow_redirects=allow_redirects,
                                urlencode=self._urlencode,
                                clear_cookies=clear_cookies)

    if self._client.logger.isEnabledFor(logging.DEBUG):
      self._client.logger.debug(
          "%s Got response: %s%s" %
          (method,
           smart_unicode(resp.content[:1000], errors='replace'),
           len(resp.content) > 1000 and "..." or ""))

    return self._format_response(resp)
Ejemplo n.º 5
0
 def decorator(*args, **kwargs):
   response = {}
   try:
     return view_fn(*args, **kwargs)
   except Exception, e:
     LOG.exception('Error running %s' % view_fn)
     response['status'] = -1
     response['message'] = smart_unicode(e)
Ejemplo n.º 6
0
def render_to_string_normal(template_name, django_context):
  data_dict = dict()
  if isinstance(django_context, django.template.Context):
    for d in reversed(django_context.dicts):
      data_dict.update(d)
  else:
    data_dict = django_context

  template = lookup.get_template(template_name)
  result = template.render(**data_dict)
  return i18n.smart_unicode(result)
Ejemplo n.º 7
0
  def process_exception(self, request, exception):
    import traceback
    tb = traceback.format_exc()
    logging.info("Processing exception: %s: %s" % (i18n.smart_unicode(exception),
                                                   i18n.smart_unicode(tb)))

    if isinstance(exception, PopupException):
      return exception.response(request)

    if isinstance(exception, StructuredException):
      if request.ajax:
        response = render_json(exception.response_data)
        response[MIDDLEWARE_HEADER] = 'EXCEPTION'
        response.status_code = getattr(exception, 'error_code', 500)
        return response
      else:
        response = render("error.mako", request,
                      dict(error=exception.response_data.get("message")))
        response.status_code = getattr(exception, 'error_code', 500)
        return response

    return None
Ejemplo n.º 8
0
def render_to_string_normal(template_name, django_context):
  data_dict = dict()
  if isinstance(django_context, django.template.context.Context):
    for d in reversed(django_context.dicts):
      if d:
        data_dict.update(d)
    data_dict.update({'request': django_context.request})
  else:
    data_dict = django_context

  template = lookup.get_template(template_name)
  data_dict = dict(map(lambda k: (str(k), data_dict.get(k)), data_dict.keys()))
  result = template.render(**data_dict)
  return i18n.smart_unicode(result)
Ejemplo n.º 9
0
def rerun_oozie_coordinator(request, job_id, app_path=None):
  oozie_coordinator = check_job_access_permission(request, job_id)
  check_job_edition_permission(oozie_coordinator, request.user)
  ParametersFormSet = formset_factory(ParameterForm, extra=0)
  if app_path is None:
    app_path = oozie_coordinator.coordJobPath
  return_json = request.GET.get('format') == 'json'

  if request.method == 'POST':
    params_form = ParametersFormSet(request.POST)
    rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator)

    if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
      args = {}
      args['deployment_dir'] = app_path

      params = {
        'type': 'action',
        'scope': ','.join(oozie_coordinator.aggreate(rerun_form.cleaned_data['actions'])),
        'refresh': rerun_form.cleaned_data['refresh'],
        'nocleanup': rerun_form.cleaned_data['nocleanup'],
      }

      properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data])

      _rerun_coordinator(request, job_id, args, params, properties)

      if rerun_form.cleaned_data['return_json']:
        return JsonResponse({'status': 0, 'job_id': job_id}, safe=False)
      else:
        request.info(_('Coordinator re-running.'))
        return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id}))
    else:
      request.error(_('Invalid submission form: %s') % smart_unicode(rerun_form.errors))
      return list_oozie_coordinator(request, job_id)
  else:
    rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator, return_json=return_json)
    initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict)
    params_form = ParametersFormSet(initial=initial_params)

  popup = render('dashboard/rerun_coord_popup.mako', request, {
                   'rerun_form': rerun_form,
                   'params_form': params_form,
                   'action': reverse('oozie:rerun_oozie_coord', kwargs={'job_id': job_id, 'app_path': app_path}),
                   'return_json': return_json,
                   'is_mini': request.GET.get('is_mini', False),
                 }, force_template=True).content

  return JsonResponse(popup, safe=False)
Ejemplo n.º 10
0
def rerun_oozie_coordinator(request, job_id, app_path):
    oozie_coordinator = check_job_access_permission(request, job_id)
    check_job_edition_permission(oozie_coordinator, request.user)
    ParametersFormSet = formset_factory(ParameterForm, extra=0)

    if request.method == "POST":
        params_form = ParametersFormSet(request.POST)
        rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator)

        if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
            args = {}
            args["deployment_dir"] = app_path

            params = {
                "type": "action",
                "scope": ",".join(oozie_coordinator.aggreate(rerun_form.cleaned_data["actions"])),
                "refresh": rerun_form.cleaned_data["refresh"],
                "nocleanup": rerun_form.cleaned_data["nocleanup"],
            }

            properties = dict([(param["name"], param["value"]) for param in params_form.cleaned_data])

            _rerun_coordinator(request, job_id, args, params, properties)

            request.info(_("Coordinator re-running."))
            return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id}))
        else:
            request.error(_("Invalid submission form: %s") % smart_unicode(rerun_form.errors))
            return list_oozie_coordinator(request, job_id)
    else:
        rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator)
        initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict)
        params_form = ParametersFormSet(initial=initial_params)

    popup = render(
        "dashboard/rerun_coord_popup.mako",
        request,
        {
            "rerun_form": rerun_form,
            "params_form": params_form,
            "action": reverse("oozie:rerun_oozie_coord", kwargs={"job_id": job_id, "app_path": app_path}),
        },
        force_template=True,
    ).content

    return JsonResponse(popup, safe=False)
Ejemplo n.º 11
0
def escape_rows(rows):
  data = []

  for row in rows:
    escaped_row = []
    for field in row:
      if isinstance(field, numbers.Number):
        if math.isnan(field) or math.isinf(field):
          escaped_field = json.dumps(field)
        else:
          escaped_field = field
      elif field is None:
        escaped_field = 'NULL'
      else:
        field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1
        escaped_field = escape(field).replace(' ', ' ')
      escaped_row.append(escaped_field)
    data.append(escaped_row)

  return data
Ejemplo n.º 12
0
 def __unicode__(self):
   return smart_unicode(self.message)
Ejemplo n.º 13
0
      response['status'] = -3
    except AuthenticationRequired, e:
      response['status'] = 401
    except ValidationError, e:
      LOG.exception('Error validation %s' % func)
      response['status'] = -1
      response['message'] = e.message
    except OperationTimeout, e:
      response['status'] = -4
    except FilesystemException, e:
      response['status'] = 2
      response['message'] = e.message
    except QueryError, e:
      LOG.exception('Error running %s' % func)
      response['status'] = 1
      response['message'] = smart_unicode(e)
      if e.handle:
        response['handle'] = e.handle
      if e.extra:
        response.update(e.extra)
    except OperationNotSupported, e:
      response['status'] = 5
      response['message'] = e.message
    except Exception, e:
      LOG.exception('Error running %s' % func)
      response['status'] = -1
      response['message'] = smart_unicode(e)
    finally:
      if response:
        return JsonResponse(response)
Ejemplo n.º 14
0
def augment_response(collection, query, response):
    # HTML escaping
    if not query.get('download'):
        id_field = collection.get('idField', '')

        for doc in response['response']['docs']:
            link = None
            if 'link-meta' in doc:
                meta = json.loads(doc['link-meta'])
                link = get_data_link(meta)
            elif 'link' in doc:
                meta = {'type': 'link', 'link': doc['link']}
                link = get_data_link(meta)

            for field, value in doc.iteritems():
                if isinstance(value, numbers.Number):
                    escaped_value = value
                elif field == '_childDocuments_':  # Nested documents
                    escaped_value = value
                elif isinstance(value, list):  # Multivalue field
                    escaped_value = [
                        smart_unicode(escape(val), errors='replace')
                        for val in value
                    ]
                else:
                    value = smart_unicode(value, errors='replace')
                    escaped_value = escape(value)
                doc[field] = escaped_value

            doc['externalLink'] = link
            doc['details'] = []
            doc['hueId'] = smart_unicode(doc.get(id_field, ''))
            if 'moreLikeThis' in response and response['moreLikeThis'][
                    doc['hueId']].get('numFound'):
                _doc = response['moreLikeThis'][doc['hueId']]
                doc['_childDocuments_'] = _doc['docs']
                doc['numFound'] = _doc['numFound']
                del response['moreLikeThis'][doc['hueId']]

    highlighted_fields = response.get('highlighting', {}).keys()
    if highlighted_fields and not query.get('download'):
        id_field = collection.get('idField')
        if id_field:
            for doc in response['response']['docs']:
                if id_field in doc and smart_unicode(
                        doc[id_field]) in highlighted_fields:
                    highlighting = response['highlighting'][smart_unicode(
                        doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(
                                    hl, errors='replace')).replace(
                                        '&lt;em&gt;', '<em>').replace(
                                            '&lt;/em&gt;', '</em>')
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls[0] if len(
                                _hls) == 1 else _hls

                        doc.update(escaped_highlighting)
        else:
            response['warning'] = _(
                "The Solr schema requires an id field for performing the result highlighting"
            )
Ejemplo n.º 15
0
    def wrapper(*args, **kwargs):
        response = {}

        try:
            return f(*args, **kwargs)
        except SessionExpired as e:
            response['status'] = -2
        except QueryExpired as e:
            response['status'] = -3
            if e.message and isinstance(e.message, basestring):
                response['message'] = e.message
        except AuthenticationRequired as e:
            response['status'] = 401
            if e.message and isinstance(e.message, basestring):
                response['message'] = e.message
        except ValidationError as e:
            LOG.exception('Error validation %s' % f)
            response['status'] = -1
            response['message'] = e.message
        except OperationTimeout as e:
            response['status'] = -4
        except FilesystemException as e:
            response['status'] = 2
            response['message'] = e.message or 'Query history not found'
        except QueryError as e:
            LOG.exception('Error running %s' % f.__name__)
            response['status'] = 1
            response['message'] = smart_unicode(e)
            if response['message'].index("max_row_size"):
                size = re.search(r"(\d+.?\d*) (.B)", response['message'])
                if size and size.group(1):
                    response['help'] = {
                        'setting': {
                            'name':
                            'max_row_size',
                            'value':
                            str(
                                int(
                                    _closest_power_of_2(
                                        _to_size_in_bytes(
                                            size.group(1), size.group(2)))))
                        }
                    }
            if e.handle:
                response['handle'] = e.handle
            if e.extra:
                response.update(e.extra)
        except OperationNotSupported as e:
            response['status'] = 5
            response['message'] = e.message
        except RestException as e:
            message = extract_solr_exception_message(e)
            response['status'] = 1
            response['message'] = message.get('error')
        except Exception as e:
            LOG.exception('Error running %s' % f.__name__)
            response['status'] = -1
            response['message'] = smart_unicode(e)
        finally:
            if response:
                return JsonResponse(response)
Ejemplo n.º 16
0
Archivo: models.py Proyecto: Jfeng3/hue
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []

  normalized_facets = []

  selected_values = dict([((fq['id'], fq['field'], fq['type']), fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    # e.g. [{u'field': u'sun', u'type': u'query', u'id': u'67b43a63-ed22-747b-47e8-b31aad1431ea', u'label': u'sun'}
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = facet['field']
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(name, selected_values.get((facet['id'], name, category), []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': name,
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          # add total result count?
        }
        normalized_facets.append(facet)
      elif category == 'range' and response['facet_counts']['facet_ranges']:
        name = facet['field']
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(name, selected_values.get((facet['id'], name, 'range'), []), counts, end)
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': name,
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'count': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = ','.join([facet['field']] + [f['field'] for f in facet['properties']['facets']])
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'count': count,
        }
        normalized_facets.append(facet)

  # HTML escaping
  for doc in response['response']['docs']:
    for field, value in doc.iteritems():
      if isinstance(value, numbers.Number):
        escaped_value = value
      else:
        value = smart_unicode(value, errors='replace')
        escaped_value = escape(value)
      doc[field] = escaped_value

    if not query['download']:
      doc['showDetails'] = False
      doc['details'] = []

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and str(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][str(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls
  
            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Ejemplo n.º 17
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented["normalized_facets"] = []
    NAME = "%(field)s-%(id)s"
    normalized_facets = []

    selected_values = dict([(fq["id"], fq["filter"]) for fq in query["fqs"]])

    if response and response.get("facet_counts"):
        for facet in collection["facets"]:
            category = facet["type"]

            if category == "field" and response["facet_counts"]["facet_fields"]:
                name = NAME % facet
                collection_facet = get_facet_field(category, name, collection["facets"])
                counts = pairwise2(
                    facet["field"], selected_values.get(facet["id"], []), response["facet_counts"]["facet_fields"][name]
                )
                if collection_facet["properties"]["sort"] == "asc":
                    counts.reverse()
                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                }
                normalized_facets.append(facet)
            elif (category == "range" or category == "range-up") and response["facet_counts"]["facet_ranges"]:
                name = NAME % facet
                collection_facet = get_facet_field(category, name, collection["facets"])
                counts = response["facet_counts"]["facet_ranges"][name]["counts"]
                end = response["facet_counts"]["facet_ranges"][name]["end"]
                counts = range_pair(
                    facet["field"], name, selected_values.get(facet["id"], []), counts, end, collection_facet
                )
                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                    "extraSeries": [],
                }
                normalized_facets.append(facet)
            elif category == "query" and response["facet_counts"]["facet_queries"]:
                for name, value in response["facet_counts"]["facet_queries"].iteritems():
                    collection_facet = get_facet_field(category, name, collection["facets"])
                    facet = {
                        "id": collection_facet["id"],
                        "query": name,
                        "type": category,
                        "label": name,
                        "counts": value,
                    }
                    normalized_facets.append(facet)
            elif category == "pivot":
                name = NAME % facet
                if "facet_pivot" in response["facet_counts"] and name in response["facet_counts"]["facet_pivot"]:
                    if facet["properties"]["scope"] == "stack":
                        count = _augment_pivot_2d(
                            name, facet["id"], response["facet_counts"]["facet_pivot"][name], selected_values
                        )
                    else:
                        count = response["facet_counts"]["facet_pivot"][name]
                        _augment_pivot_nd(facet["id"], count, selected_values)
                else:
                    count = []
                facet = {"id": facet["id"], "field": name, "type": category, "label": name, "counts": count}
                normalized_facets.append(facet)

    if response and response.get("facets"):
        for facet in collection["facets"]:
            category = facet["type"]
            name = facet["id"]  # Nested facets can only have one name

            if category == "function" and name in response["facets"]:
                value = response["facets"][name]
                collection_facet = get_facet_field(category, name, collection["facets"])
                facet = {"id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value}
                normalized_facets.append(facet)
            elif category == "nested" and name in response["facets"]:
                value = response["facets"][name]
                collection_facet = get_facet_field(category, name, collection["facets"])
                extraSeries = []
                counts = response["facets"][name]["buckets"]

                # Date range
                if collection_facet["properties"]["isDate"]:
                    dimension = 3
                    # Single dimension or dimension 2 with analytics
                    if not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][
                        "aggregate"
                    ] not in ("count", "unique"):
                        counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])]
                        counts = range_pair(
                            facet["field"], name, selected_values.get(facet["id"], []), counts, 1, collection_facet
                        )
                    else:
                        # Dimension 1 with counts and 2 with analytics
                        _series = collections.defaultdict(list)
                        for f in counts:
                            for bucket in f["d2"]["buckets"] if "d2" in f else []:
                                _series[bucket["val"]].append(f["val"])
                                _series[bucket["val"]].append(bucket["d2"] if "d2" in bucket else bucket["count"])
                        for name, val in _series.iteritems():
                            _c = range_pair(
                                facet["field"], name, selected_values.get(facet["id"], []), val, 1, collection_facet
                            )
                            extraSeries.append({"counts": _c, "label": name})
                        counts = []
                elif not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][
                    "aggregate"
                ] not in ("count", "unique"):
                    # Single dimension or dimension 2 with analytics
                    dimension = 1
                    counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])]
                    counts = pairwise2(facet["field"], selected_values.get(facet["id"], []), counts)
                else:
                    # Dimension 1 with counts and 2 with analytics
                    dimension = 2
                    counts = _augment_stats_2d(name, facet, counts, selected_values)

                if collection_facet["properties"]["sort"] == "asc":
                    counts.reverse()

                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                    "extraSeries": extraSeries,
                    "dimension": dimension,
                }

                normalized_facets.append(facet)

        # Remove unnecessary facet data
        if response:
            response.pop("facet_counts")
            response.pop("facets")

    # HTML escaping
    for doc in response["response"]["docs"]:
        for field, value in doc.iteritems():
            if isinstance(value, numbers.Number):
                escaped_value = value
            elif isinstance(value, list):  # Multivalue field
                escaped_value = [smart_unicode(val, errors="replace") for val in value]
            else:
                value = smart_unicode(value, errors="replace")
                escaped_value = escape(value)
            doc[field] = escaped_value

        if not query.get("download"):
            link = None
            if "link-meta" in doc:
                meta = json.loads(doc["link-meta"])
                link = get_data_link(meta)

            doc["externalLink"] = link
            doc["details"] = []

    highlighted_fields = response.get("highlighting", {}).keys()
    if highlighted_fields and not query.get("download"):
        id_field = collection.get("idField")
        if id_field:
            for doc in response["response"]["docs"]:
                if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
                    highlighting = response["highlighting"][smart_unicode(doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(hl, errors="replace"))
                                .replace("&lt;em&gt;", "<em>")
                                .replace("&lt;/em&gt;", "</em>")
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls

                        doc.update(escaped_highlighting)
        else:
            response["warning"] = _("The Solr schema requires an id field for performing the result highlighting")

    if normalized_facets:
        augmented["normalized_facets"].extend(normalized_facets)

    return augmented
Ejemplo n.º 18
0
Archivo: views.py Proyecto: onimsha/hue
def view_results(request, id, first_row=0):
    """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
    first_row = long(first_row)
    start_over = (first_row == 0)
    results = type('Result', (object, ), {
        'rows': 0,
        'columns': [],
        'has_more': False,
        'start_row': 0,
    })
    data = []
    fetch_error = False
    error_message = ''
    log = ''
    columns = []
    app_name = get_app_name(request)

    query_history = authorized_get_query_history(request, id, must_exist=True)
    query_server = query_history.get_query_server_config()
    db = dbms.get(request.user, query_server)

    handle, state = _get_query_handle_and_state(query_history)
    context_param = request.GET.get('context', '')
    query_context = parse_query_context(context_param)

    # Update the status as expired should not be accessible
    # Impala does not support startover for now
    expired = state == models.QueryHistory.STATE.expired

    # Retrieve query results or use empty result if no result set
    try:
        if query_server[
                'server_name'] == 'impala' and not handle.has_result_set:
            downloadable = False
        else:
            results = db.fetch(handle, start_over, 100)
            data = []

            # Materialize and HTML escape results
            # TODO: use Number + list comprehension
            for row in results.rows():
                escaped_row = []
                for field in row:
                    if isinstance(field, (int, long, float, complex, bool)):
                        escaped_field = field
                    elif field is None:
                        escaped_field = 'NULL'
                    else:
                        field = smart_unicode(
                            field, errors='replace'
                        )  # Prevent error when getting back non utf8 like charset=iso-8859-1
                        escaped_field = escape(field).replace(' ', '&nbsp;')
                    escaped_row.append(escaped_field)
                data.append(escaped_row)

            # We display the "Download" button only when we know that there are results:
            downloadable = first_row > 0 or data
            log = db.get_log(handle)
            columns = results.data_table.cols()

    except Exception, ex:
        fetch_error = True
        error_message, log = expand_exception(ex, db, handle)
Ejemplo n.º 19
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        cols = ['%(field)s' % facet, 'count(%(field)s)' % facet]
        last_x_col = 0
        last_xx_col = 0
        for i, f in enumerate(facet['properties']['facets']):
          if f['aggregate']['function'] == 'count':
            cols.append(f['field'])
            last_xx_col = last_x_col
            last_x_col = i + 2
          cols.append(SolrApi._get_aggregate_function(f))
        rows = []

        # For dim in dimensions

        # Number or Date range
        if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field':
          dimension = 3 if collection_facet['properties']['isDate'] else 1
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1:
            column = 'count'
            if len(collection_facet['properties']['facets']) == 1:
              agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
              legend = agg_keys[0].split(':', 2)[1]
              column = agg_keys[0]
            else:
              legend = facet['field'] # 'count(%s)' % legend
              agg_keys = [column]

            _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
            agg_keys.sort(key=lambda a: a[4:])

            if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
              agg_keys.insert(0, 'count')
            counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            _series = collections.defaultdict(list)

            for row in rows:
              for i, cell in enumerate(row):
                if i > last_x_col:
                  legend = cols[i]
                  if last_xx_col != last_x_col:
                    legend = '%s %s' % (cols[i], row[last_x_col])
                  _series[legend].append(row[last_xx_col])
                  _series[legend].append(cell)

            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif collection_facet['properties'].get('isOldPivot'):
          facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count']

          column = 'count'
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
          dimension = len(facet_fields)
        elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1):
          # Dimension 1 with 1 count or agg
          dimension = 1

          column = 'count'
          if len(collection_facet['properties']['facets']) == 1:
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
            legend = agg_keys[0].split(':', 2)[1]
            column = agg_keys[0]
          else:
            legend = facet['field']
            agg_keys = [column]

          _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
          counts = pairwise2(legend, selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 2 with analytics or 1 with N aggregates
          dimension = 2
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']])

          counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts)

        num_bucket = response['facets'][name]['numBuckets'] if 'numBuckets' in response['facets'][name] else len(response['facets'][name])
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension,
          'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets
          'docs': [dict(zip(cols, row)) for row in rows],
          'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols]
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  if not query.get('download'):
    id_field = collection.get('idField', '')

    for doc in response['response']['docs']:
      for field, value in doc.iteritems():
        if isinstance(value, numbers.Number):
          escaped_value = value
        elif field == '_childDocuments_': # Nested documents
          escaped_value = value
        elif isinstance(value, list): # Multivalue field
          escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
        else:
          value = smart_unicode(value, errors='replace')
          escaped_value = escape(value)
        doc[field] = escaped_value

      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []
      doc['hueId'] = smart_unicode(doc.get(id_field, ''))

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Ejemplo n.º 20
0
    def _invoke(self,
                method,
                relpath=None,
                params=None,
                data=None,
                headers=None,
                files=None,
                allow_redirects=False,
                clear_cookies=False,
                log_response=True):
        """
    Invoke an API method.
    Print trace even when there is an exception.
    @return: Raw body or JSON dictionary (if response content type is JSON).
    """
        path = self._join_uri(relpath)
        start_time = time.time()
        resp = None

        try:
            resp = self._client.execute(method,
                                        path,
                                        params=params,
                                        data=data,
                                        headers=headers,
                                        files=files,
                                        allow_redirects=allow_redirects,
                                        urlencode=self._urlencode,
                                        clear_cookies=clear_cookies)
        finally:
            # Output duration without content
            log_length = conf.REST_RESPONSE_SIZE.get(
            ) != -1 and conf.REST_RESPONSE_SIZE.get() if log_response else 0
            duration = time.time() - start_time
            try:
                req_data = smart_unicode(data, errors='replace')
                resp_content = smart_unicode(
                    resp.content,
                    errors='replace') if resp and resp.content else ''
                message = u'%s %s %s%s%s %s%s returned in %dms %s %s %s%s' % (
                    method, type(self._client._session.auth)
                    if self._client._session and self._client._session.auth
                    else None, self._client._base_url,
                    smart_unicode(path, errors='replace'),
                    iri_to_uri('?' + urlencode(params)) if params else '',
                    req_data[:log_length] if data else '', log_length and
                    len(req_data) > log_length and '...' or '' if data else '',
                    (duration * 1000), resp.status_code if resp else 0,
                    len(resp_content) if resp else 0,
                    resp_content[:log_length] if resp else '',
                    log_length and len(resp_content) > log_length and '...'
                    or '' if resp else '')
            except:
                short_call_name = '%s %s' % (method, self._client._base_url)
                LOG.exception('Error logging return call %s' % short_call_name)
                message = '%s returned in %dms' % (short_call_name, duration)
            self._client.logger.disabled = 0

            log_if_slow_call(duration=duration,
                             message=message,
                             logger=self._client.logger)

        return resp
Ejemplo n.º 21
0
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib_unquote(file_format["path"])
        stream = request.fs.open(path)
        encoding = check_encoding(stream.read(10000))
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_ and format_['sample']:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception as e:
                LOG.warn(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
    elif file_format['inputFormat'] == 'rdbms':
        api = _get_api(request)
        sample = api.get_sample_data(None,
                                     database=file_format['rdbmsDatabaseName'],
                                     table=file_format['tableName'])

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(col['name'], col['type']).to_dict()
                for col in sample['full_headers']
            ]
        }
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            if file_format.get(
                    'kafkaSelectedTopics') == 'NavigatorAuditEvents':
                kafkaFieldNames = [
                    'id', 'additionalInfo', 'allowed', 'collectionName',
                    'databaseName', 'db', 'DELEGATION_TOKEN_ID', 'dst',
                    'entityId', 'family', 'impersonator', 'ip', 'name',
                    'objectType', 'objType', 'objUsageType', 'operationParams',
                    'operationText', 'op', 'opText', 'path', 'perms',
                    'privilege', 'qualifier', 'QUERY_ID', 'resourcePath',
                    'service', 'SESSION_ID', 'solrVersion', 'src', 'status',
                    'subOperation', 'tableName', 'table', 'time', 'type',
                    'url', 'user'
                ]
                kafkaFieldTypes = ['string'] * len(kafkaFieldNames)
                kafkaFieldNames.append('timeDate')
                kafkaFieldTypes.append('date')
            else:
                # Note: mocked here, should come from SFDC or Kafka API or sampling job
                kafkaFieldNames = file_format.get('kafkaFieldNames',
                                                  '').split(',')
                kafkaFieldTypes = file_format.get('kafkaFieldTypes',
                                                  '').split(',')

            data = """%(kafkaFieldNames)s
%(data)s""" % {
                'kafkaFieldNames': ','.join(kafkaFieldNames),
                'data': '\n'.join(
                    [','.join(['...'] * len(kafkaFieldTypes))] * 5)
            }
            stream = string_io()
            stream.write(data)

            _convert_format(file_format["format"], inverse=True)

            indexer = MorphlineIndexer(request.user, request.fs)
            format_ = indexer.guess_field_types({
                "file": {
                    "stream": stream,
                    "name": file_format['path']
                },
                "format": file_format['format']
            })
            type_mapping = dict(list(zip(kafkaFieldNames, kafkaFieldTypes)))

            for col in format_['columns']:
                col['keyType'] = type_mapping[col['name']]
                col['type'] = type_mapping[col['name']]
        elif file_format['streamSelection'] == 'flume':
            if 'hue-httpd/access_log' in file_format['channelSourcePath']:
                columns = [{
                    'name': 'id',
                    'type': 'string',
                    'unique': True
                }, {
                    'name': 'client_ip',
                    'type': 'string'
                }, {
                    'name': 'time',
                    'type': 'date'
                }, {
                    'name': 'request',
                    'type': 'string'
                }, {
                    'name': 'code',
                    'type': 'plong'
                }, {
                    'name': 'bytes',
                    'type': 'plong'
                }, {
                    'name': 'method',
                    'type': 'string'
                }, {
                    'name': 'url',
                    'type': 'string'
                }, {
                    'name': 'protocol',
                    'type': 'string'
                }, {
                    'name': 'app',
                    'type': 'string'
                }, {
                    'name': 'subapp',
                    'type': 'string'
                }]
            else:
                columns = [{'name': 'message', 'type': 'string'}]

            format_ = {
                "sample": [['...'] * len(columns)] * 4,
                "columns": [
                    Field(col['name'],
                          HiveFormat.FIELD_TYPE_TRANSLATE.get(
                              col['type'], 'string'),
                          unique=col.get('unique')).to_dict()
                    for col in columns
                ]
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            table_metadata = [{
                'name': column['name'],
                'type': column['type']
            } for column in sf.restful('sobjects/%(streamObject)s/describe/' %
                                       file_format)['fields']]
            query = 'SELECT %s FROM %s LIMIT 4' % (', '.join(
                [col['name']
                 for col in table_metadata]), file_format['streamObject'])
            print(query)

            try:
                records = sf.query_all(query)
            except SalesforceRefusedRequest as e:
                raise PopupException(message=str(e))

            format_ = {
                "sample":
                [list(row.values())[1:] for row in records['records']],
                "columns": [
                    Field(
                        col['name'],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in table_metadata
                ]
            }
        else:
            raise PopupException(
                _('Connector format not recognized: %(connectorSelection)s') %
                file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    return JsonResponse(format_)
Ejemplo n.º 22
0
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib.unquote(file_format["path"])
        stream = request.fs.open(path)
        encoding = chardet.detect(stream.read(10000)).get('encoding')
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception, e:
                LOG.warn(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
Ejemplo n.º 23
0
Archivo: api3.py Proyecto: ranade1/hue
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'localfile':
        path = urllib_unquote(file_format['path'])

        with open(path, 'r') as local_file:

            reader = csv.reader(local_file)
            csv_data = list(reader)

            if file_format['format']['hasHeader']:
                sample = csv_data[1:5]
                column_row = [
                    re.sub('[^0-9a-zA-Z]+', '_', col) for col in csv_data[0]
                ]
            else:
                sample = csv_data[:4]
                column_row = [
                    'field_' + str(count + 1)
                    for count, col in enumerate(sample[0])
                ]

            field_type_guesses = []
            for count, col in enumerate(column_row):
                column_samples = [
                    sample_row[count] for sample_row in sample
                    if len(sample_row) > count
                ]
                field_type_guess = guess_field_type_from_samples(
                    column_samples)
                field_type_guesses.append(field_type_guess)

            columns = [
                Field(column_row[count], field_type_guesses[count]).to_dict()
                for count, col in enumerate(column_row)
            ]

            format_ = {'columns': columns, 'sample': sample}

    elif file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib_unquote(file_format["path"])
        if path[-3:] == 'xls' or path[-4:] == 'xlsx':
            path = excel_to_csv_file_name_change(path)
        stream = request.fs.open(path)
        encoding = check_encoding(stream.read(10000))
        LOG.debug('File %s encoding is %s' % (path, encoding))
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_ and format_['sample']:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception as e:
                LOG.warning(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
    elif file_format['inputFormat'] == 'rdbms':
        api = _get_api(request)
        sample = api.get_sample_data(None,
                                     database=file_format['rdbmsDatabaseName'],
                                     table=file_format['tableName'])

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(col['name'], col['type']).to_dict()
                for col in sample['full_headers']
            ]
        }
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            data = get_topic_data(request.user,
                                  file_format.get('kafkaSelectedTopics'))

            kafkaFieldNames = [col['name'] for col in data['full_headers']]
            kafkaFieldTypes = [col['type'] for col in data['full_headers']]
            topics_data = data['rows']

            format_ = {
                "sample":
                topics_data,
                "columns": [
                    Field(col, 'string', unique=False).to_dict()
                    for col in kafkaFieldNames
                ]
            }
        elif file_format['streamSelection'] == 'flume':
            if 'hue-httpd/access_log' in file_format['channelSourcePath']:
                columns = [{
                    'name': 'id',
                    'type': 'string',
                    'unique': True
                }, {
                    'name': 'client_ip',
                    'type': 'string'
                }, {
                    'name': 'time',
                    'type': 'date'
                }, {
                    'name': 'request',
                    'type': 'string'
                }, {
                    'name': 'code',
                    'type': 'plong'
                }, {
                    'name': 'bytes',
                    'type': 'plong'
                }, {
                    'name': 'method',
                    'type': 'string'
                }, {
                    'name': 'url',
                    'type': 'string'
                }, {
                    'name': 'protocol',
                    'type': 'string'
                }, {
                    'name': 'app',
                    'type': 'string'
                }, {
                    'name': 'subapp',
                    'type': 'string'
                }]
            else:
                columns = [{'name': 'message', 'type': 'string'}]

            format_ = {
                "sample": [['...'] * len(columns)] * 4,
                "columns": [
                    Field(col['name'],
                          HiveFormat.FIELD_TYPE_TRANSLATE.get(
                              col['type'], 'string'),
                          unique=col.get('unique')).to_dict()
                    for col in columns
                ]
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            table_metadata = [{
                'name': column['name'],
                'type': column['type']
            } for column in sf.restful('sobjects/%(streamObject)s/describe/' %
                                       file_format)['fields']]
            query = 'SELECT %s FROM %s LIMIT 4' % (', '.join(
                [col['name']
                 for col in table_metadata]), file_format['streamObject'])
            print(query)

            try:
                records = sf.query_all(query)
            except SalesforceRefusedRequest as e:
                raise PopupException(message=str(e))

            format_ = {
                "sample":
                [list(row.values())[1:] for row in records['records']],
                "columns": [
                    Field(
                        col['name'],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in table_metadata
                ]
            }
        else:
            raise PopupException(
                _('Connector format not recognized: %(connectorSelection)s') %
                file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    return JsonResponse(format_)
Ejemplo n.º 24
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented['normalized_facets'] = []

    normalized_facets = []

    selected_values = dict([((fq['id'], fq['field'], fq['type']), fq['filter'])
                            for fq in query['fqs']])

    if response and response.get('facet_counts'):
        # e.g. [{u'field': u'sun', u'type': u'query', u'id': u'67b43a63-ed22-747b-47e8-b31aad1431ea', u'label': u'sun'}
        for facet in collection['facets']:
            category = facet['type']

            if category == 'field' and response['facet_counts']['facet_fields']:
                name = facet['field']
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = pairwise2(
                    name, selected_values.get((facet['id'], name, category),
                                              []),
                    response['facet_counts']['facet_fields'][name])
                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()
                facet = {
                    'id': collection_facet['id'],
                    'field': name,
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    # add total result count?
                }
                normalized_facets.append(facet)
            elif category == 'range' and response['facet_counts'][
                    'facet_ranges']:
                name = facet['field']
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = response['facet_counts']['facet_ranges'][name][
                    'counts']
                end = response['facet_counts']['facet_ranges'][name]['end']
                counts = range_pair(
                    name, selected_values.get((facet['id'], name, 'range'),
                                              []), counts, end)
                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()
                facet = {
                    'id': collection_facet['id'],
                    'field': name,
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': []
                }
                normalized_facets.append(facet)
            elif category == 'query' and response['facet_counts'][
                    'facet_queries']:
                for name, value in response['facet_counts'][
                        'facet_queries'].iteritems():
                    collection_facet = get_facet_field(category, name,
                                                       collection['facets'])
                    facet = {
                        'id': collection_facet['id'],
                        'query': name,
                        'type': category,
                        'label': name,
                        'count': value,
                    }
                    normalized_facets.append(facet)
            # pivot_facet

    # HTML escaping
    for doc in response['response']['docs']:
        for field, value in doc.iteritems():
            if isinstance(value, numbers.Number):
                escaped_value = value
            else:
                value = smart_unicode(value, errors='replace')
                escaped_value = escape(value)
            doc[field] = escaped_value
        doc['showDetails'] = False
        doc['details'] = []

    highlighted_fields = response.get('highlighting', {}).keys()
    if highlighted_fields:
        id_field = collection.get('idField')
        if id_field:
            for doc in response['response']['docs']:
                if id_field in doc and doc[id_field] in highlighted_fields:
                    doc.update(response['highlighting'][doc[id_field]])
        else:
            response['warning'] = _(
                "The Solr schema requires an id field for performing the result highlighting"
            )

    if normalized_facets:
        augmented['normalized_facets'].extend(normalized_facets)

    return augmented
Ejemplo n.º 25
0
        try:
            return func(*args, **kwargs)
        except SessionExpired, e:
            response['status'] = -2
        except QueryExpired, e:
            response['status'] = -3
        except AuthenticationRequired, e:
            response['status'] = 401
        except ValidationError, e:
            LOG.exception('Error validation %s' % func)
            response['status'] = -1
            response['message'] = e.message
        except QueryError, e:
            LOG.exception('Error running %s' % func)
            response['status'] = 1
            response['message'] = smart_unicode(e)
            if e.handle:
                response['handle'] = e.handle
            if e.extra:
                response.update(e.extra)
        except Exception, e:
            LOG.exception('Error running %s' % func)
            response['status'] = -1
            response['message'] = smart_unicode(e)
        finally:
            if response:
                return JsonResponse(response)

    return decorator

Ejemplo n.º 26
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        cols = ['%(field)s' % facet, 'count(%(field)s)' % facet]
        last_x_col = 0
        last_xx_col = 0
        for i, f in enumerate(facet['properties']['facets']):
          if f['aggregate']['function'] == 'count':
            cols.append(f['field'])
            last_xx_col = last_x_col
            last_x_col = i + 2
          cols.append(SolrApi._get_aggregate_function(f))
        rows = []

        # For dim in dimensions

        # Number or Date range
        if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field':
          dimension = 3
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1:
            column = 'count'
            if len(collection_facet['properties']['facets']) == 1:
              agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
              legend = agg_keys[0].split(':', 2)[1]
              column = agg_keys[0]
            else:
              legend = facet['field'] # 'count(%s)' % legend
              agg_keys = [column]

            _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics

            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
            agg_keys.sort(key=lambda a: a[4:])

            if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
              agg_keys.insert(0, 'count')
            counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            _series = collections.defaultdict(list)

            for row in rows:
              for i, cell in enumerate(row):
                if i > last_x_col:
                  legend = cols[i]
                  if last_xx_col != last_x_col:
                    legend = '%s %s' % (cols[i], row[last_x_col])
                  _series[legend].append(row[last_xx_col])
                  _series[legend].append(cell)

            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif collection_facet['properties'].get('isOldPivot'):
          facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count']
 
          column = 'count'
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
#             _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          
          print counts
#           count = response['facets'][name]
#           _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
          dimension = len(facet_fields)
        elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1):
          # Dimension 1 with 1 count or agg
          dimension = 1

          column = 'count'
          if len(collection_facet['properties']['facets']) == 1:
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
            legend = agg_keys[0].split(':', 2)[1]
            column = agg_keys[0]
          else:
            legend = facet['field']
            agg_keys = [column]

          _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
          counts = pairwise2(legend, selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 2 with analytics or 1 with N aggregates
          dimension = 2
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']])

          counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts)

        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension,
          'response': {'response': {'start': 0, 'numFound': response['facets'][name]['numBuckets']}}, # Todo * nested buckets + offsets
          'docs': [dict(zip(cols, row)) for row in rows],
          'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols]
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  if not query.get('download'):
    id_field = collection.get('idField', '')

    for doc in response['response']['docs']:
      for field, value in doc.iteritems():
        if isinstance(value, numbers.Number):
          escaped_value = value
        elif field == '_childDocuments_': # Nested documents
          escaped_value = value
        elif isinstance(value, list): # Multivalue field
          escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
        else:
          value = smart_unicode(value, errors='replace')
          escaped_value = escape(value)
        doc[field] = escaped_value

      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []
      doc['hueId'] = smart_unicode(doc.get(id_field, ''))

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Ejemplo n.º 27
0
 def __unicode__(self):
     return smart_unicode(self.message)
Ejemplo n.º 28
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])

        counts = _augment_stats_2d(name, facet, response['facets'][name]['buckets'], selected_values)

        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()

        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  for doc in response['response']['docs']:
    for field, value in doc.iteritems():
      if isinstance(value, numbers.Number):
        escaped_value = value
      else:
        value = smart_unicode(value, errors='replace')
        escaped_value = escape(value)
      doc[field] = escaped_value

    if not query.get('download'):
      doc['showDetails'] = False
      doc['details'] = []

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and str(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][str(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Ejemplo n.º 29
0
def rerun_oozie_coordinator(request, job_id, app_path):
    oozie_coordinator = check_job_access_permission(request, job_id)
    check_job_edition_permission(oozie_coordinator, request.user)
    ParametersFormSet = formset_factory(ParameterForm, extra=0)

    if request.method == 'POST':
        params_form = ParametersFormSet(request.POST)
        rerun_form = RerunCoordForm(request.POST,
                                    oozie_coordinator=oozie_coordinator)

        if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2:
            args = {}
            args['deployment_dir'] = app_path

            params = {
                'type':
                'action',
                'scope':
                ','.join(
                    oozie_coordinator.aggreate(
                        rerun_form.cleaned_data['actions'])),
                'refresh':
                rerun_form.cleaned_data['refresh'],
                'nocleanup':
                rerun_form.cleaned_data['nocleanup'],
            }

            properties = dict([(param['name'], param['value'])
                               for param in params_form.cleaned_data])

            _rerun_coordinator(request, job_id, args, params, properties)

            request.info(_('Coordinator re-running.'))
            return redirect(
                reverse('oozie:list_oozie_coordinator',
                        kwargs={'job_id': job_id}))
        else:
            request.error(
                _('Invalid submission form: %s') %
                smart_unicode(rerun_form.errors))
            return list_oozie_coordinator(request, job_id)
    else:
        rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator)
        initial_params = ParameterForm.get_initial_params(
            oozie_coordinator.conf_dict)
        params_form = ParametersFormSet(initial=initial_params)

    popup = render('dashboard/rerun_coord_popup.mako',
                   request, {
                       'rerun_form':
                       rerun_form,
                       'params_form':
                       params_form,
                       'action':
                       reverse('oozie:rerun_oozie_coord',
                               kwargs={
                                   'job_id': job_id,
                                   'app_path': app_path
                               }),
                   },
                   force_template=True).content

    return HttpResponse(json.dumps(popup), mimetype="application/json")
Ejemplo n.º 30
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)
      data = []

      # Materialize and HTML escape results
      # TODO: use Number + list comprehension
      for row in results.rows():
        escaped_row = []
        for field in row:
          if isinstance(field, (int, long, float, complex, bool)):
            if math.isnan(field) or math.isinf(field):
              escaped_field = json.dumps(field)
            else:
              escaped_field = field
          elif field is None:
            escaped_field = 'NULL'
          else:
            field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1
            escaped_field = escape(field).replace(' ', '&nbsp;')
          escaped_row.append(escaped_field)
        data.append(escaped_row)

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception, ex:
    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)
Ejemplo n.º 31
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented['normalized_facets'] = []
    NAME = '%(field)s-%(id)s'
    normalized_facets = []

    selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

    if response and response.get('facet_counts'):
        for facet in collection['facets']:
            category = facet['type']

            if category == 'field' and response['facet_counts']['facet_fields']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = pairwise2(
                    facet['field'], selected_values.get(facet['id'], []),
                    response['facet_counts']['facet_fields'][name])
                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                }
                normalized_facets.append(facet)
            elif (category == 'range' or category
                  == 'range-up') and response['facet_counts']['facet_ranges']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = response['facet_counts']['facet_ranges'][name][
                    'counts']
                end = response['facet_counts']['facet_ranges'][name]['end']
                counts = range_pair(facet['field'], name,
                                    selected_values.get(facet['id'], []),
                                    counts, end, collection_facet)
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': []
                }
                normalized_facets.append(facet)
            elif category == 'query' and response['facet_counts'][
                    'facet_queries']:
                for name, value in response['facet_counts'][
                        'facet_queries'].iteritems():
                    collection_facet = get_facet_field(category, name,
                                                       collection['facets'])
                    facet = {
                        'id': collection_facet['id'],
                        'query': name,
                        'type': category,
                        'label': name,
                        'counts': value,
                    }
                    normalized_facets.append(facet)
            elif category == 'pivot':
                name = NAME % facet
                if 'facet_pivot' in response[
                        'facet_counts'] and name in response['facet_counts'][
                            'facet_pivot']:
                    if facet['properties']['scope'] == 'stack':
                        count = _augment_pivot_2d(
                            name, facet['id'],
                            response['facet_counts']['facet_pivot'][name],
                            selected_values)
                    else:
                        count = response['facet_counts']['facet_pivot'][name]
                        _augment_pivot_nd(facet['id'], count, selected_values)
                else:
                    count = []
                facet = {
                    'id': facet['id'],
                    'field': name,
                    'type': category,
                    'label': name,
                    'counts': count,
                }
                normalized_facets.append(facet)

        # Remove unnecessary facet data
        if response:
            response.pop('facet_counts')

    # HTML escaping
    for doc in response['response']['docs']:
        for field, value in doc.iteritems():
            if isinstance(value, numbers.Number):
                escaped_value = value
            else:
                value = smart_unicode(value, errors='replace')
                escaped_value = escape(value)
            doc[field] = escaped_value

        if not query.get('download'):
            doc['showDetails'] = False
            doc['details'] = []

    highlighted_fields = response.get('highlighting', {}).keys()
    if highlighted_fields and not query.get('download'):
        id_field = collection.get('idField')
        if id_field:
            for doc in response['response']['docs']:
                if id_field in doc and str(
                        doc[id_field]) in highlighted_fields:
                    highlighting = response['highlighting'][str(doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(
                                    hl, errors='replace')).replace(
                                        '&lt;em&gt;', '<em>').replace(
                                            '&lt;/em&gt;', '</em>')
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls

                        doc.update(escaped_highlighting)
        else:
            response['warning'] = _(
                "The Solr schema requires an id field for performing the result highlighting"
            )

    if normalized_facets:
        augmented['normalized_facets'].extend(normalized_facets)

    return augmented
Ejemplo n.º 32
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        # Date range
        if collection_facet['properties']['isDate']:
          dimension = 3
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'):
            counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics
            _series = collections.defaultdict(list)
            for f in counts:
              for bucket in (f['d2']['buckets'] if 'd2' in f else []):
                _series[bucket['val']].append(f['val'])
                _series[bucket['val']].append(bucket['d2'] if 'd2' in bucket else bucket['count'])
            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'):
          # Single dimension or dimension 2 with analytics
          dimension = 1
          counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])]
          counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 1 with counts and 2 with analytics
          dimension = 2
          counts = _augment_stats_2d(name, facet, counts, selected_values)

        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()

        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  for doc in response['response']['docs']:
    for field, value in doc.iteritems():
      if isinstance(value, numbers.Number):
        escaped_value = value
      else:
        value = smart_unicode(value, errors='replace')
        escaped_value = escape(value)
      doc[field] = escaped_value

    if not query.get('download'):
      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Ejemplo n.º 33
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented['normalized_facets'] = []
    NAME = '%(field)s-%(id)s'
    normalized_facets = []

    selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

    if response and response.get('facet_counts'):
        for facet in collection['facets']:
            category = facet['type']

            if category == 'field' and response['facet_counts']['facet_fields']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = pairwise2(
                    facet['field'], selected_values.get(facet['id'], []),
                    response['facet_counts']['facet_fields'][name])
                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                }
                normalized_facets.append(facet)
            elif (category == 'range' or category
                  == 'range-up') and response['facet_counts']['facet_ranges']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = response['facet_counts']['facet_ranges'][name][
                    'counts']
                end = response['facet_counts']['facet_ranges'][name]['end']
                counts = range_pair(facet['field'], name,
                                    selected_values.get(facet['id'], []),
                                    counts, end, collection_facet)
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': []
                }
                normalized_facets.append(facet)
            elif category == 'query' and response['facet_counts'][
                    'facet_queries']:
                for name, value in response['facet_counts'][
                        'facet_queries'].iteritems():
                    collection_facet = get_facet_field(category, name,
                                                       collection['facets'])
                    facet = {
                        'id': collection_facet['id'],
                        'query': name,
                        'type': category,
                        'label': name,
                        'counts': value,
                    }
                    normalized_facets.append(facet)
            elif category == 'pivot':
                name = NAME % facet
                if 'facet_pivot' in response[
                        'facet_counts'] and name in response['facet_counts'][
                            'facet_pivot']:
                    if facet['properties']['scope'] == 'stack':
                        count = _augment_pivot_2d(
                            name, facet['id'],
                            response['facet_counts']['facet_pivot'][name],
                            selected_values)
                    else:
                        count = response['facet_counts']['facet_pivot'][name]
                        _augment_pivot_nd(facet['id'], count, selected_values)
                else:
                    count = []
                facet = {
                    'id': facet['id'],
                    'field': name,
                    'type': category,
                    'label': name,
                    'counts': count,
                }
                normalized_facets.append(facet)

    if response and response.get('facets'):
        for facet in collection['facets']:
            category = facet['type']
            name = facet['id']  # Nested facets can only have one name

            if category == 'function' and name in response['facets']:
                value = response['facets'][name]
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                facet = {
                    'id': collection_facet['id'],
                    'query': name,
                    'type': category,
                    'label': name,
                    'counts': value,
                }
                normalized_facets.append(facet)
            elif category == 'nested' and name in response['facets']:
                value = response['facets'][name]
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                extraSeries = []
                counts = response['facets'][name]['buckets']

                # Date range
                if collection_facet['properties']['isDate']:
                    dimension = 3
                    # Single dimension or dimension 2 with analytics
                    if not collection_facet['properties'][
                            'facets'] or collection_facet['properties'][
                                'facets'][0]['aggregate'] not in ('count',
                                                                  'unique'):
                        counts = [
                            _v for _f in counts
                            for _v in (_f['val'],
                                       _f['d2'] if 'd2' in _f else _f['count'])
                        ]
                        counts = range_pair(
                            facet['field'], name,
                            selected_values.get(facet['id'], []), counts, 1,
                            collection_facet)
                    else:
                        # Dimension 1 with counts and 2 with analytics
                        _series = collections.defaultdict(list)
                        for f in counts:
                            for bucket in (f['d2']['buckets']
                                           if 'd2' in f else []):
                                _series[bucket['val']].append(f['val'])
                                _series[bucket['val']].append(
                                    bucket['d2'] if 'd2' in
                                    bucket else bucket['count'])
                        for name, val in _series.iteritems():
                            _c = range_pair(
                                facet['field'], name,
                                selected_values.get(facet['id'], []), val, 1,
                                collection_facet)
                            extraSeries.append({'counts': _c, 'label': name})
                        counts = []
                elif not collection_facet['properties'][
                        'facets'] or collection_facet['properties']['facets'][
                            0]['aggregate'] not in ('count', 'unique'):
                    # Single dimension or dimension 2 with analytics
                    dimension = 1
                    counts = [
                        _v for _f in counts
                        for _v in (_f['val'],
                                   _f['d2'] if 'd2' in _f else _f['count'])
                    ]
                    counts = pairwise2(facet['field'],
                                       selected_values.get(facet['id'], []),
                                       counts)
                else:
                    # Dimension 1 with counts and 2 with analytics
                    dimension = 2
                    counts = _augment_stats_2d(name, facet, counts,
                                               selected_values)

                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()

                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': extraSeries,
                    'dimension': dimension
                }

                normalized_facets.append(facet)

        # Remove unnecessary facet data
        if response:
            response.pop('facet_counts')
            response.pop('facets')

    # HTML escaping
    if not query.get('download'):
        for doc in response['response']['docs']:
            for field, value in doc.iteritems():
                if isinstance(value, numbers.Number):
                    escaped_value = value
                elif isinstance(value, list):  # Multivalue field
                    escaped_value = [
                        smart_unicode(val, errors='replace') for val in value
                    ]
                else:
                    value = smart_unicode(value, errors='replace')
                    escaped_value = escape(value)
                doc[field] = escaped_value

            link = None
            if 'link-meta' in doc:
                meta = json.loads(doc['link-meta'])
                link = get_data_link(meta)

            doc['externalLink'] = link
            doc['details'] = []

    highlighted_fields = response.get('highlighting', {}).keys()
    if highlighted_fields and not query.get('download'):
        id_field = collection.get('idField')
        if id_field:
            for doc in response['response']['docs']:
                if id_field in doc and smart_unicode(
                        doc[id_field]) in highlighted_fields:
                    highlighting = response['highlighting'][smart_unicode(
                        doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(
                                    hl, errors='replace')).replace(
                                        '&lt;em&gt;', '<em>').replace(
                                            '&lt;/em&gt;', '</em>')
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls

                        doc.update(escaped_highlighting)
        else:
            response['warning'] = _(
                "The Solr schema requires an id field for performing the result highlighting"
            )

    if normalized_facets:
        augmented['normalized_facets'].extend(normalized_facets)

    return augmented