def process_exception(self, request, exception): import traceback tb = traceback.format_exc() logging.info("Processing exception: %s: %s" % (i18n.smart_unicode(exception), i18n.smart_unicode(tb))) if hasattr(exception, "response"): return exception.response(request) if hasattr(exception, "response_data"): if request.ajax: response = render_json(exception.response_data) response[MIDDLEWARE_HEADER] = 'EXCEPTION' return response else: return render("error.mako", request, dict(error=exception.response_data.get("message"))) # We didn't handle it as a special exception, but if we're ajax we still # need to do some kind of nicer handling than the built-in page # Note that exception may actually be an Http404 or similar. if request.ajax: err = _("An error occurred: %(error)s") % {'error': exception} logging.exception("Middleware caught an exception") return PopupException(err, detail=None).response(request) return None
def _invoke(self, method, relpath=None, params=None, data=None, headers=None, files=None, allow_redirects=False, clear_cookies=False, log_response=True): """ Invoke an API method. @return: Raw body or JSON dictionary (if response content type is JSON). """ path = self._join_uri(relpath) start_time = time.time() resp = self._client.execute(method, path, params=params, data=data, headers=headers, files=files, allow_redirects=allow_redirects, urlencode=self._urlencode, clear_cookies=clear_cookies) if log_response: log_length = conf.REST_RESPONSE_SIZE.get() != -1 and conf.REST_RESPONSE_SIZE.get() duration = time.time() - start_time message = "%s %s Got response%s: %s%s" % ( method, smart_unicode(path, errors='ignore'), ' in %dms' % (duration * 1000), smart_unicode(resp.content[:log_length or None], errors='replace'), log_length and len(resp.content) > log_length and "..." or "" ) self._client.logger.disabled = 0 log_if_slow_call(duration=duration, message=message, logger=self._client.logger) return resp
def _init_attributes(self): self.queueName = i18n.smart_unicode(self.job.profile.queueName) self.jobName = i18n.smart_unicode(self.job.profile.name) self.user = i18n.smart_unicode(self.job.profile.user) self.mapProgress = self.job.status.mapProgress self.reduceProgress = self.job.status.reduceProgress self.setupProgress = self.job.status.setupProgress self.cleanupProgress = self.job.status.cleanupProgress if self.job.desiredMaps == 0: maps_percent_complete = 0 else: maps_percent_complete = int( round( float(self.job.finishedMaps) / self.job.desiredMaps * 100)) self.desiredMaps = self.job.desiredMaps if self.job.desiredReduces == 0: reduces_percent_complete = 0 else: reduces_percent_complete = int( round( float(self.job.finishedReduces) / self.job.desiredReduces * 100)) self.desiredReduces = self.job.desiredReduces self.maps_percent_complete = maps_percent_complete self.finishedMaps = self.job.finishedMaps self.finishedReduces = self.job.finishedReduces self.reduces_percent_complete = reduces_percent_complete self.startTimeMs = self.job.startTime self.startTimeFormatted = format_unixtime_ms(self.job.startTime) self.launchTimeMs = self.job.launchTime self.launchTimeFormatted = format_unixtime_ms(self.job.launchTime) self.finishTimeMs = self.job.finishTime self.finishTimeFormatted = format_unixtime_ms(self.job.finishTime) self.status = self.job.status.runStateAsString self.priority = self.job.priorityAsString self.jobFile = self.job.profile.jobFile finishTime = self.job.finishTime if finishTime == 0: finishTime = datetime.datetime.now() else: finishTime = datetime.datetime.fromtimestamp(finishTime / 1000) self.duration = finishTime - datetime.datetime.fromtimestamp( self.job.startTime / 1000) diff = int(finishTime.strftime("%s")) * 1000 - self.startTimeMs self.durationFormatted = format_duration_in_millis(diff) self.durationInMillis = diff
def invoke(self, method, relpath=None, params=None, data=None, headers=None, files=None, allow_redirects=False, clear_cookies=False): """ Invoke an API method. @return: Raw body or JSON dictionary (if response content type is JSON). """ path = self._join_uri(relpath) resp = self._client.execute(method, path, params=params, data=data, headers=headers, files=files, allow_redirects=allow_redirects, urlencode=self._urlencode, clear_cookies=clear_cookies) if self._client.logger.isEnabledFor(logging.DEBUG): self._client.logger.debug( "%s Got response: %s%s" % (method, smart_unicode(resp.content[:1000], errors='replace'), len(resp.content) > 1000 and "..." or "")) return self._format_response(resp)
def decorator(*args, **kwargs): response = {} try: return view_fn(*args, **kwargs) except Exception, e: LOG.exception('Error running %s' % view_fn) response['status'] = -1 response['message'] = smart_unicode(e)
def render_to_string_normal(template_name, django_context): data_dict = dict() if isinstance(django_context, django.template.Context): for d in reversed(django_context.dicts): data_dict.update(d) else: data_dict = django_context template = lookup.get_template(template_name) result = template.render(**data_dict) return i18n.smart_unicode(result)
def process_exception(self, request, exception): import traceback tb = traceback.format_exc() logging.info("Processing exception: %s: %s" % (i18n.smart_unicode(exception), i18n.smart_unicode(tb))) if isinstance(exception, PopupException): return exception.response(request) if isinstance(exception, StructuredException): if request.ajax: response = render_json(exception.response_data) response[MIDDLEWARE_HEADER] = 'EXCEPTION' response.status_code = getattr(exception, 'error_code', 500) return response else: response = render("error.mako", request, dict(error=exception.response_data.get("message"))) response.status_code = getattr(exception, 'error_code', 500) return response return None
def render_to_string_normal(template_name, django_context): data_dict = dict() if isinstance(django_context, django.template.context.Context): for d in reversed(django_context.dicts): if d: data_dict.update(d) data_dict.update({'request': django_context.request}) else: data_dict = django_context template = lookup.get_template(template_name) data_dict = dict(map(lambda k: (str(k), data_dict.get(k)), data_dict.keys())) result = template.render(**data_dict) return i18n.smart_unicode(result)
def rerun_oozie_coordinator(request, job_id, app_path=None): oozie_coordinator = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_coordinator, request.user) ParametersFormSet = formset_factory(ParameterForm, extra=0) if app_path is None: app_path = oozie_coordinator.coordJobPath return_json = request.GET.get('format') == 'json' if request.method == 'POST': params_form = ParametersFormSet(request.POST) rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} args['deployment_dir'] = app_path params = { 'type': 'action', 'scope': ','.join(oozie_coordinator.aggreate(rerun_form.cleaned_data['actions'])), 'refresh': rerun_form.cleaned_data['refresh'], 'nocleanup': rerun_form.cleaned_data['nocleanup'], } properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) _rerun_coordinator(request, job_id, args, params, properties) if rerun_form.cleaned_data['return_json']: return JsonResponse({'status': 0, 'job_id': job_id}, safe=False) else: request.info(_('Coordinator re-running.')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s') % smart_unicode(rerun_form.errors)) return list_oozie_coordinator(request, job_id) else: rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator, return_json=return_json) initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict) params_form = ParametersFormSet(initial=initial_params) popup = render('dashboard/rerun_coord_popup.mako', request, { 'rerun_form': rerun_form, 'params_form': params_form, 'action': reverse('oozie:rerun_oozie_coord', kwargs={'job_id': job_id, 'app_path': app_path}), 'return_json': return_json, 'is_mini': request.GET.get('is_mini', False), }, force_template=True).content return JsonResponse(popup, safe=False)
def rerun_oozie_coordinator(request, job_id, app_path): oozie_coordinator = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_coordinator, request.user) ParametersFormSet = formset_factory(ParameterForm, extra=0) if request.method == "POST": params_form = ParametersFormSet(request.POST) rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} args["deployment_dir"] = app_path params = { "type": "action", "scope": ",".join(oozie_coordinator.aggreate(rerun_form.cleaned_data["actions"])), "refresh": rerun_form.cleaned_data["refresh"], "nocleanup": rerun_form.cleaned_data["nocleanup"], } properties = dict([(param["name"], param["value"]) for param in params_form.cleaned_data]) _rerun_coordinator(request, job_id, args, params, properties) request.info(_("Coordinator re-running.")) return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id})) else: request.error(_("Invalid submission form: %s") % smart_unicode(rerun_form.errors)) return list_oozie_coordinator(request, job_id) else: rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator) initial_params = ParameterForm.get_initial_params(oozie_coordinator.conf_dict) params_form = ParametersFormSet(initial=initial_params) popup = render( "dashboard/rerun_coord_popup.mako", request, { "rerun_form": rerun_form, "params_form": params_form, "action": reverse("oozie:rerun_oozie_coord", kwargs={"job_id": job_id, "app_path": app_path}), }, force_template=True, ).content return JsonResponse(popup, safe=False)
def escape_rows(rows): data = [] for row in rows: escaped_row = [] for field in row: if isinstance(field, numbers.Number): if math.isnan(field) or math.isinf(field): escaped_field = json.dumps(field) else: escaped_field = field elif field is None: escaped_field = 'NULL' else: field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1 escaped_field = escape(field).replace(' ', ' ') escaped_row.append(escaped_field) data.append(escaped_row) return data
def __unicode__(self): return smart_unicode(self.message)
response['status'] = -3 except AuthenticationRequired, e: response['status'] = 401 except ValidationError, e: LOG.exception('Error validation %s' % func) response['status'] = -1 response['message'] = e.message except OperationTimeout, e: response['status'] = -4 except FilesystemException, e: response['status'] = 2 response['message'] = e.message except QueryError, e: LOG.exception('Error running %s' % func) response['status'] = 1 response['message'] = smart_unicode(e) if e.handle: response['handle'] = e.handle if e.extra: response.update(e.extra) except OperationNotSupported, e: response['status'] = 5 response['message'] = e.message except Exception, e: LOG.exception('Error running %s' % func) response['status'] = -1 response['message'] = smart_unicode(e) finally: if response: return JsonResponse(response)
def augment_response(collection, query, response): # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) elif 'link' in doc: meta = {'type': 'link', 'link': doc['link']} link = get_data_link(meta) for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [ smart_unicode(escape(val), errors='replace') for val in value ] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) if 'moreLikeThis' in response and response['moreLikeThis'][ doc['hueId']].get('numFound'): _doc = response['moreLikeThis'][doc['hueId']] doc['_childDocuments_'] = _doc['docs'] doc['numFound'] = _doc['numFound'] del response['moreLikeThis'][doc['hueId']] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode( doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode( doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode( hl, errors='replace')).replace( '<em>', '<em>').replace( '</em>', '</em>') for hl in hls ] escaped_highlighting[field] = _hls[0] if len( _hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _( "The Solr schema requires an id field for performing the result highlighting" )
def wrapper(*args, **kwargs): response = {} try: return f(*args, **kwargs) except SessionExpired as e: response['status'] = -2 except QueryExpired as e: response['status'] = -3 if e.message and isinstance(e.message, basestring): response['message'] = e.message except AuthenticationRequired as e: response['status'] = 401 if e.message and isinstance(e.message, basestring): response['message'] = e.message except ValidationError as e: LOG.exception('Error validation %s' % f) response['status'] = -1 response['message'] = e.message except OperationTimeout as e: response['status'] = -4 except FilesystemException as e: response['status'] = 2 response['message'] = e.message or 'Query history not found' except QueryError as e: LOG.exception('Error running %s' % f.__name__) response['status'] = 1 response['message'] = smart_unicode(e) if response['message'].index("max_row_size"): size = re.search(r"(\d+.?\d*) (.B)", response['message']) if size and size.group(1): response['help'] = { 'setting': { 'name': 'max_row_size', 'value': str( int( _closest_power_of_2( _to_size_in_bytes( size.group(1), size.group(2))))) } } if e.handle: response['handle'] = e.handle if e.extra: response.update(e.extra) except OperationNotSupported as e: response['status'] = 5 response['message'] = e.message except RestException as e: message = extract_solr_exception_message(e) response['status'] = 1 response['message'] = message.get('error') except Exception as e: LOG.exception('Error running %s' % f.__name__) response['status'] = -1 response['message'] = smart_unicode(e) finally: if response: return JsonResponse(response)
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] normalized_facets = [] selected_values = dict([((fq['id'], fq['field'], fq['type']), fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): # e.g. [{u'field': u'sun', u'type': u'query', u'id': u'67b43a63-ed22-747b-47e8-b31aad1431ea', u'label': u'sun'} for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = facet['field'] collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(name, selected_values.get((facet['id'], name, category), []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': name, 'type': category, 'label': collection_facet['label'], 'counts': counts, # add total result count? } normalized_facets.append(facet) elif category == 'range' and response['facet_counts']['facet_ranges']: name = facet['field'] collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(name, selected_values.get((facet['id'], name, 'range'), []), counts, end) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': name, 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'count': value, } normalized_facets.append(facet) elif category == 'pivot': name = ','.join([facet['field']] + [f['field'] for f in facet['properties']['facets']]) if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'query': name, 'type': category, 'label': name, 'count': count, } normalized_facets.append(facet) # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value if not query['download']: doc['showDetails'] = False doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and str(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][str(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented["normalized_facets"] = [] NAME = "%(field)s-%(id)s" normalized_facets = [] selected_values = dict([(fq["id"], fq["filter"]) for fq in query["fqs"]]) if response and response.get("facet_counts"): for facet in collection["facets"]: category = facet["type"] if category == "field" and response["facet_counts"]["facet_fields"]: name = NAME % facet collection_facet = get_facet_field(category, name, collection["facets"]) counts = pairwise2( facet["field"], selected_values.get(facet["id"], []), response["facet_counts"]["facet_fields"][name] ) if collection_facet["properties"]["sort"] == "asc": counts.reverse() facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, } normalized_facets.append(facet) elif (category == "range" or category == "range-up") and response["facet_counts"]["facet_ranges"]: name = NAME % facet collection_facet = get_facet_field(category, name, collection["facets"]) counts = response["facet_counts"]["facet_ranges"][name]["counts"] end = response["facet_counts"]["facet_ranges"][name]["end"] counts = range_pair( facet["field"], name, selected_values.get(facet["id"], []), counts, end, collection_facet ) facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, "extraSeries": [], } normalized_facets.append(facet) elif category == "query" and response["facet_counts"]["facet_queries"]: for name, value in response["facet_counts"]["facet_queries"].iteritems(): collection_facet = get_facet_field(category, name, collection["facets"]) facet = { "id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value, } normalized_facets.append(facet) elif category == "pivot": name = NAME % facet if "facet_pivot" in response["facet_counts"] and name in response["facet_counts"]["facet_pivot"]: if facet["properties"]["scope"] == "stack": count = _augment_pivot_2d( name, facet["id"], response["facet_counts"]["facet_pivot"][name], selected_values ) else: count = response["facet_counts"]["facet_pivot"][name] _augment_pivot_nd(facet["id"], count, selected_values) else: count = [] facet = {"id": facet["id"], "field": name, "type": category, "label": name, "counts": count} normalized_facets.append(facet) if response and response.get("facets"): for facet in collection["facets"]: category = facet["type"] name = facet["id"] # Nested facets can only have one name if category == "function" and name in response["facets"]: value = response["facets"][name] collection_facet = get_facet_field(category, name, collection["facets"]) facet = {"id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value} normalized_facets.append(facet) elif category == "nested" and name in response["facets"]: value = response["facets"][name] collection_facet = get_facet_field(category, name, collection["facets"]) extraSeries = [] counts = response["facets"][name]["buckets"] # Date range if collection_facet["properties"]["isDate"]: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][ "aggregate" ] not in ("count", "unique"): counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])] counts = range_pair( facet["field"], name, selected_values.get(facet["id"], []), counts, 1, collection_facet ) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in f["d2"]["buckets"] if "d2" in f else []: _series[bucket["val"]].append(f["val"]) _series[bucket["val"]].append(bucket["d2"] if "d2" in bucket else bucket["count"]) for name, val in _series.iteritems(): _c = range_pair( facet["field"], name, selected_values.get(facet["id"], []), val, 1, collection_facet ) extraSeries.append({"counts": _c, "label": name}) counts = [] elif not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][ "aggregate" ] not in ("count", "unique"): # Single dimension or dimension 2 with analytics dimension = 1 counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])] counts = pairwise2(facet["field"], selected_values.get(facet["id"], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet["properties"]["sort"] == "asc": counts.reverse() facet = { "id": collection_facet["id"], "field": facet["field"], "type": category, "label": collection_facet["label"], "counts": counts, "extraSeries": extraSeries, "dimension": dimension, } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop("facet_counts") response.pop("facets") # HTML escaping for doc in response["response"]["docs"]: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(val, errors="replace") for val in value] else: value = smart_unicode(value, errors="replace") escaped_value = escape(value) doc[field] = escaped_value if not query.get("download"): link = None if "link-meta" in doc: meta = json.loads(doc["link-meta"]) link = get_data_link(meta) doc["externalLink"] = link doc["details"] = [] highlighted_fields = response.get("highlighting", {}).keys() if highlighted_fields and not query.get("download"): id_field = collection.get("idField") if id_field: for doc in response["response"]["docs"]: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response["highlighting"][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode(hl, errors="replace")) .replace("<em>", "<em>") .replace("</em>", "</em>") for hl in hls ] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response["warning"] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented["normalized_facets"].extend(normalized_facets) return augmented
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object, ), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible # Impala does not support startover for now expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server[ 'server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) data = [] # Materialize and HTML escape results # TODO: use Number + list comprehension for row in results.rows(): escaped_row = [] for field in row: if isinstance(field, (int, long, float, complex, bool)): escaped_field = field elif field is None: escaped_field = 'NULL' else: field = smart_unicode( field, errors='replace' ) # Prevent error when getting back non utf8 like charset=iso-8859-1 escaped_field = escape(field).replace(' ', ' ') escaped_row.append(escaped_field) data.append(escaped_row) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: fetch_error = True error_message, log = expand_exception(ex, db, handle)
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 if collection_facet['properties']['isDate'] else 1 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) num_bucket = response['facets'][name]['numBuckets'] if 'numBuckets' in response['facets'][name] else len(response['facets'][name]) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def _invoke(self, method, relpath=None, params=None, data=None, headers=None, files=None, allow_redirects=False, clear_cookies=False, log_response=True): """ Invoke an API method. Print trace even when there is an exception. @return: Raw body or JSON dictionary (if response content type is JSON). """ path = self._join_uri(relpath) start_time = time.time() resp = None try: resp = self._client.execute(method, path, params=params, data=data, headers=headers, files=files, allow_redirects=allow_redirects, urlencode=self._urlencode, clear_cookies=clear_cookies) finally: # Output duration without content log_length = conf.REST_RESPONSE_SIZE.get( ) != -1 and conf.REST_RESPONSE_SIZE.get() if log_response else 0 duration = time.time() - start_time try: req_data = smart_unicode(data, errors='replace') resp_content = smart_unicode( resp.content, errors='replace') if resp and resp.content else '' message = u'%s %s %s%s%s %s%s returned in %dms %s %s %s%s' % ( method, type(self._client._session.auth) if self._client._session and self._client._session.auth else None, self._client._base_url, smart_unicode(path, errors='replace'), iri_to_uri('?' + urlencode(params)) if params else '', req_data[:log_length] if data else '', log_length and len(req_data) > log_length and '...' or '' if data else '', (duration * 1000), resp.status_code if resp else 0, len(resp_content) if resp else 0, resp_content[:log_length] if resp else '', log_length and len(resp_content) > log_length and '...' or '' if resp else '') except: short_call_name = '%s %s' % (method, self._client._base_url) LOG.exception('Error logging return call %s' % short_call_name) message = '%s returned in %dms' % (short_call_name, duration) self._client.logger.disabled = 0 log_if_slow_call(duration=duration, message=message, logger=self._client.logger) return resp
def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': indexer = MorphlineIndexer(request.user, request.fs) path = urllib_unquote(file_format["path"]) stream = request.fs.open(path) encoding = check_encoding(stream.read(10000)) stream.seek(0) _convert_format(file_format["format"], inverse=True) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": path }, "format": file_format['format'] }) # Note: Would also need to set charset to table (only supported in Hive) if 'sample' in format_ and format_['sample']: format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding) for col in format_['columns']: col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding) elif file_format['inputFormat'] == 'table': sample = get_api(request, { 'type': 'hive' }).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName']) db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) format_ = { "sample": sample['rows'][:4], "columns": [ Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict() for col in table_metadata.cols ] } elif file_format['inputFormat'] == 'query': query_id = file_format['query']['id'] if file_format['query'].get( 'id') else file_format['query'] notebook = Notebook(document=Document2.objects.document( user=request.user, doc_id=query_id)).get_data() snippet = notebook['snippets'][0] db = get_api(request, snippet) if file_format.get('sampleCols'): columns = file_format.get('sampleCols') sample = file_format.get('sample') else: snippet['query'] = snippet['statement'] try: sample = db.fetch_result(notebook, snippet, 4, start_over=True)['rows'][:4] except Exception as e: LOG.warn( 'Skipping sample data as query handle might be expired: %s' % e) sample = [[], [], [], [], []] columns = db.autocomplete(snippet=snippet, database='', table='') columns = [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in columns['extended_columns'] ] format_ = { "sample": sample, "columns": columns, } elif file_format['inputFormat'] == 'rdbms': api = _get_api(request) sample = api.get_sample_data(None, database=file_format['rdbmsDatabaseName'], table=file_format['tableName']) format_ = { "sample": list(sample['rows'])[:4], "columns": [ Field(col['name'], col['type']).to_dict() for col in sample['full_headers'] ] } elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': if file_format.get( 'kafkaSelectedTopics') == 'NavigatorAuditEvents': kafkaFieldNames = [ 'id', 'additionalInfo', 'allowed', 'collectionName', 'databaseName', 'db', 'DELEGATION_TOKEN_ID', 'dst', 'entityId', 'family', 'impersonator', 'ip', 'name', 'objectType', 'objType', 'objUsageType', 'operationParams', 'operationText', 'op', 'opText', 'path', 'perms', 'privilege', 'qualifier', 'QUERY_ID', 'resourcePath', 'service', 'SESSION_ID', 'solrVersion', 'src', 'status', 'subOperation', 'tableName', 'table', 'time', 'type', 'url', 'user' ] kafkaFieldTypes = ['string'] * len(kafkaFieldNames) kafkaFieldNames.append('timeDate') kafkaFieldTypes.append('date') else: # Note: mocked here, should come from SFDC or Kafka API or sampling job kafkaFieldNames = file_format.get('kafkaFieldNames', '').split(',') kafkaFieldTypes = file_format.get('kafkaFieldTypes', '').split(',') data = """%(kafkaFieldNames)s %(data)s""" % { 'kafkaFieldNames': ','.join(kafkaFieldNames), 'data': '\n'.join( [','.join(['...'] * len(kafkaFieldTypes))] * 5) } stream = string_io() stream.write(data) _convert_format(file_format["format"], inverse=True) indexer = MorphlineIndexer(request.user, request.fs) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": file_format['path'] }, "format": file_format['format'] }) type_mapping = dict(list(zip(kafkaFieldNames, kafkaFieldTypes))) for col in format_['columns']: col['keyType'] = type_mapping[col['name']] col['type'] = type_mapping[col['name']] elif file_format['streamSelection'] == 'flume': if 'hue-httpd/access_log' in file_format['channelSourcePath']: columns = [{ 'name': 'id', 'type': 'string', 'unique': True }, { 'name': 'client_ip', 'type': 'string' }, { 'name': 'time', 'type': 'date' }, { 'name': 'request', 'type': 'string' }, { 'name': 'code', 'type': 'plong' }, { 'name': 'bytes', 'type': 'plong' }, { 'name': 'method', 'type': 'string' }, { 'name': 'url', 'type': 'string' }, { 'name': 'protocol', 'type': 'string' }, { 'name': 'app', 'type': 'string' }, { 'name': 'subapp', 'type': 'string' }] else: columns = [{'name': 'message', 'type': 'string'}] format_ = { "sample": [['...'] * len(columns)] * 4, "columns": [ Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get( col['type'], 'string'), unique=col.get('unique')).to_dict() for col in columns ] } elif file_format['inputFormat'] == 'connector': if file_format['connectorSelection'] == 'sfdc': sf = Salesforce(username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken']) table_metadata = [{ 'name': column['name'], 'type': column['type'] } for column in sf.restful('sobjects/%(streamObject)s/describe/' % file_format)['fields']] query = 'SELECT %s FROM %s LIMIT 4' % (', '.join( [col['name'] for col in table_metadata]), file_format['streamObject']) print(query) try: records = sf.query_all(query) except SalesforceRefusedRequest as e: raise PopupException(message=str(e)) format_ = { "sample": [list(row.values())[1:] for row in records['records']], "columns": [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get( col['type'], 'string')).to_dict() for col in table_metadata ] } else: raise PopupException( _('Connector format not recognized: %(connectorSelection)s') % file_format) else: raise PopupException( _('Input format not recognized: %(inputFormat)s') % file_format) return JsonResponse(format_)
def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': indexer = MorphlineIndexer(request.user, request.fs) path = urllib.unquote(file_format["path"]) stream = request.fs.open(path) encoding = chardet.detect(stream.read(10000)).get('encoding') stream.seek(0) _convert_format(file_format["format"], inverse=True) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": path }, "format": file_format['format'] }) # Note: Would also need to set charset to table (only supported in Hive) if 'sample' in format_: format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding) for col in format_['columns']: col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding) elif file_format['inputFormat'] == 'table': sample = get_api(request, { 'type': 'hive' }).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName']) db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) format_ = { "sample": sample['rows'][:4], "columns": [ Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict() for col in table_metadata.cols ] } elif file_format['inputFormat'] == 'query': query_id = file_format['query']['id'] if file_format['query'].get( 'id') else file_format['query'] notebook = Notebook(document=Document2.objects.document( user=request.user, doc_id=query_id)).get_data() snippet = notebook['snippets'][0] db = get_api(request, snippet) if file_format.get('sampleCols'): columns = file_format.get('sampleCols') sample = file_format.get('sample') else: snippet['query'] = snippet['statement'] try: sample = db.fetch_result(notebook, snippet, 4, start_over=True)['rows'][:4] except Exception, e: LOG.warn( 'Skipping sample data as query handle might be expired: %s' % e) sample = [[], [], [], [], []] columns = db.autocomplete(snippet=snippet, database='', table='') columns = [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in columns['extended_columns'] ] format_ = { "sample": sample, "columns": columns, }
def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'localfile': path = urllib_unquote(file_format['path']) with open(path, 'r') as local_file: reader = csv.reader(local_file) csv_data = list(reader) if file_format['format']['hasHeader']: sample = csv_data[1:5] column_row = [ re.sub('[^0-9a-zA-Z]+', '_', col) for col in csv_data[0] ] else: sample = csv_data[:4] column_row = [ 'field_' + str(count + 1) for count, col in enumerate(sample[0]) ] field_type_guesses = [] for count, col in enumerate(column_row): column_samples = [ sample_row[count] for sample_row in sample if len(sample_row) > count ] field_type_guess = guess_field_type_from_samples( column_samples) field_type_guesses.append(field_type_guess) columns = [ Field(column_row[count], field_type_guesses[count]).to_dict() for count, col in enumerate(column_row) ] format_ = {'columns': columns, 'sample': sample} elif file_format['inputFormat'] == 'file': indexer = MorphlineIndexer(request.user, request.fs) path = urllib_unquote(file_format["path"]) if path[-3:] == 'xls' or path[-4:] == 'xlsx': path = excel_to_csv_file_name_change(path) stream = request.fs.open(path) encoding = check_encoding(stream.read(10000)) LOG.debug('File %s encoding is %s' % (path, encoding)) stream.seek(0) _convert_format(file_format["format"], inverse=True) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": path }, "format": file_format['format'] }) # Note: Would also need to set charset to table (only supported in Hive) if 'sample' in format_ and format_['sample']: format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding) for col in format_['columns']: col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding) elif file_format['inputFormat'] == 'table': sample = get_api(request, { 'type': 'hive' }).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName']) db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) format_ = { "sample": sample['rows'][:4], "columns": [ Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict() for col in table_metadata.cols ] } elif file_format['inputFormat'] == 'query': query_id = file_format['query']['id'] if file_format['query'].get( 'id') else file_format['query'] notebook = Notebook(document=Document2.objects.document( user=request.user, doc_id=query_id)).get_data() snippet = notebook['snippets'][0] db = get_api(request, snippet) if file_format.get('sampleCols'): columns = file_format.get('sampleCols') sample = file_format.get('sample') else: snippet['query'] = snippet['statement'] try: sample = db.fetch_result(notebook, snippet, 4, start_over=True)['rows'][:4] except Exception as e: LOG.warning( 'Skipping sample data as query handle might be expired: %s' % e) sample = [[], [], [], [], []] columns = db.autocomplete(snippet=snippet, database='', table='') columns = [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in columns['extended_columns'] ] format_ = { "sample": sample, "columns": columns, } elif file_format['inputFormat'] == 'rdbms': api = _get_api(request) sample = api.get_sample_data(None, database=file_format['rdbmsDatabaseName'], table=file_format['tableName']) format_ = { "sample": list(sample['rows'])[:4], "columns": [ Field(col['name'], col['type']).to_dict() for col in sample['full_headers'] ] } elif file_format['inputFormat'] == 'stream': if file_format['streamSelection'] == 'kafka': data = get_topic_data(request.user, file_format.get('kafkaSelectedTopics')) kafkaFieldNames = [col['name'] for col in data['full_headers']] kafkaFieldTypes = [col['type'] for col in data['full_headers']] topics_data = data['rows'] format_ = { "sample": topics_data, "columns": [ Field(col, 'string', unique=False).to_dict() for col in kafkaFieldNames ] } elif file_format['streamSelection'] == 'flume': if 'hue-httpd/access_log' in file_format['channelSourcePath']: columns = [{ 'name': 'id', 'type': 'string', 'unique': True }, { 'name': 'client_ip', 'type': 'string' }, { 'name': 'time', 'type': 'date' }, { 'name': 'request', 'type': 'string' }, { 'name': 'code', 'type': 'plong' }, { 'name': 'bytes', 'type': 'plong' }, { 'name': 'method', 'type': 'string' }, { 'name': 'url', 'type': 'string' }, { 'name': 'protocol', 'type': 'string' }, { 'name': 'app', 'type': 'string' }, { 'name': 'subapp', 'type': 'string' }] else: columns = [{'name': 'message', 'type': 'string'}] format_ = { "sample": [['...'] * len(columns)] * 4, "columns": [ Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get( col['type'], 'string'), unique=col.get('unique')).to_dict() for col in columns ] } elif file_format['inputFormat'] == 'connector': if file_format['connectorSelection'] == 'sfdc': sf = Salesforce(username=file_format['streamUsername'], password=file_format['streamPassword'], security_token=file_format['streamToken']) table_metadata = [{ 'name': column['name'], 'type': column['type'] } for column in sf.restful('sobjects/%(streamObject)s/describe/' % file_format)['fields']] query = 'SELECT %s FROM %s LIMIT 4' % (', '.join( [col['name'] for col in table_metadata]), file_format['streamObject']) print(query) try: records = sf.query_all(query) except SalesforceRefusedRequest as e: raise PopupException(message=str(e)) format_ = { "sample": [list(row.values())[1:] for row in records['records']], "columns": [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get( col['type'], 'string')).to_dict() for col in table_metadata ] } else: raise PopupException( _('Connector format not recognized: %(connectorSelection)s') % file_format) else: raise PopupException( _('Input format not recognized: %(inputFormat)s') % file_format) return JsonResponse(format_)
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] normalized_facets = [] selected_values = dict([((fq['id'], fq['field'], fq['type']), fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): # e.g. [{u'field': u'sun', u'type': u'query', u'id': u'67b43a63-ed22-747b-47e8-b31aad1431ea', u'label': u'sun'} for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = facet['field'] collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2( name, selected_values.get((facet['id'], name, category), []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': name, 'type': category, 'label': collection_facet['label'], 'counts': counts, # add total result count? } normalized_facets.append(facet) elif category == 'range' and response['facet_counts'][ 'facet_ranges']: name = facet['field'] collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name][ 'counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair( name, selected_values.get((facet['id'], name, 'range'), []), counts, end) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': name, 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts'][ 'facet_queries']: for name, value in response['facet_counts'][ 'facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'count': value, } normalized_facets.append(facet) # pivot_facet # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value doc['showDetails'] = False doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields: id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and doc[id_field] in highlighted_fields: doc.update(response['highlighting'][doc[id_field]]) else: response['warning'] = _( "The Solr schema requires an id field for performing the result highlighting" ) if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
try: return func(*args, **kwargs) except SessionExpired, e: response['status'] = -2 except QueryExpired, e: response['status'] = -3 except AuthenticationRequired, e: response['status'] = 401 except ValidationError, e: LOG.exception('Error validation %s' % func) response['status'] = -1 response['message'] = e.message except QueryError, e: LOG.exception('Error running %s' % func) response['status'] = 1 response['message'] = smart_unicode(e) if e.handle: response['handle'] = e.handle if e.extra: response.update(e.extra) except Exception, e: LOG.exception('Error running %s' % func) response['status'] = -1 response['message'] = smart_unicode(e) finally: if response: return JsonResponse(response) return decorator
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] cols = ['%(field)s' % facet, 'count(%(field)s)' % facet] last_x_col = 0 last_xx_col = 0 for i, f in enumerate(facet['properties']['facets']): if f['aggregate']['function'] == 'count': cols.append(f['field']) last_xx_col = last_x_col last_x_col = i + 2 cols.append(SolrApi._get_aggregate_function(f)) rows = [] # For dim in dimensions # Number or Date range if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field': dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1: column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] # 'count(%s)' % legend agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) _series = collections.defaultdict(list) for row in rows: for i, cell in enumerate(row): if i > last_x_col: legend = cols[i] if last_xx_col != last_x_col: legend = '%s %s' % (cols[i], row[last_x_col]) _series[legend].append(row[last_xx_col]) _series[legend].append(cell) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif collection_facet['properties'].get('isOldPivot'): facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count'] column = 'count' agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) # _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) print counts # count = response['facets'][name] # _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2) dimension = len(facet_fields) elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1): # Dimension 1 with 1 count or agg dimension = 1 column = 'count' if len(collection_facet['properties']['facets']) == 1: agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')] legend = agg_keys[0].split(':', 2)[1] column = agg_keys[0] else: legend = facet['field'] agg_keys = [column] _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) counts = [_v for _f in counts for _v in (_f['val'], _f[column])] counts = pairwise2(legend, selected_values.get(facet['id'], []), counts) else: # Dimension 2 with analytics or 1 with N aggregates dimension = 2 agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')] agg_keys.sort(key=lambda a: a[4:]) if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'): agg_keys.insert(0, 'count') counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows) actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']]) counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension, 'response': {'response': {'start': 0, 'numFound': response['facets'][name]['numBuckets']}}, # Todo * nested buckets + offsets 'docs': [dict(zip(cols, row)) for row in rows], 'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols] } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): id_field = collection.get('idField', '') for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif field == '_childDocuments_': # Nested documents escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [smart_unicode(escape(val), errors='replace') for val in value] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] doc['hueId'] = smart_unicode(doc.get(id_field, '')) highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) counts = _augment_stats_2d(name, facet, response['facets'][name]['buckets'], selected_values) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value if not query.get('download'): doc['showDetails'] = False doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and str(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][str(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def rerun_oozie_coordinator(request, job_id, app_path): oozie_coordinator = check_job_access_permission(request, job_id) check_job_edition_permission(oozie_coordinator, request.user) ParametersFormSet = formset_factory(ParameterForm, extra=0) if request.method == 'POST': params_form = ParametersFormSet(request.POST) rerun_form = RerunCoordForm(request.POST, oozie_coordinator=oozie_coordinator) if sum([rerun_form.is_valid(), params_form.is_valid()]) == 2: args = {} args['deployment_dir'] = app_path params = { 'type': 'action', 'scope': ','.join( oozie_coordinator.aggreate( rerun_form.cleaned_data['actions'])), 'refresh': rerun_form.cleaned_data['refresh'], 'nocleanup': rerun_form.cleaned_data['nocleanup'], } properties = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) _rerun_coordinator(request, job_id, args, params, properties) request.info(_('Coordinator re-running.')) return redirect( reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error( _('Invalid submission form: %s') % smart_unicode(rerun_form.errors)) return list_oozie_coordinator(request, job_id) else: rerun_form = RerunCoordForm(oozie_coordinator=oozie_coordinator) initial_params = ParameterForm.get_initial_params( oozie_coordinator.conf_dict) params_form = ParametersFormSet(initial=initial_params) popup = render('dashboard/rerun_coord_popup.mako', request, { 'rerun_form': rerun_form, 'params_form': params_form, 'action': reverse('oozie:rerun_oozie_coord', kwargs={ 'job_id': job_id, 'app_path': app_path }), }, force_template=True).content return HttpResponse(json.dumps(popup), mimetype="application/json")
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server['server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) data = [] # Materialize and HTML escape results # TODO: use Number + list comprehension for row in results.rows(): escaped_row = [] for field in row: if isinstance(field, (int, long, float, complex, bool)): if math.isnan(field) or math.isinf(field): escaped_field = json.dumps(field) else: escaped_field = field elif field is None: escaped_field = 'NULL' else: field = smart_unicode(field, errors='replace') # Prevent error when getting back non utf8 like charset=iso-8859-1 escaped_field = escape(field).replace(' ', ' ') escaped_row.append(escaped_field) data.append(escaped_row) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: fetch_error = True error_message, log = expand_exception(ex, db, handle)
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2( facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name][ 'counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts'][ 'facet_queries']: for name, value in response['facet_counts'][ 'facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response[ 'facet_counts'] and name in response['facet_counts'][ 'facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d( name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value if not query.get('download'): doc['showDetails'] = False doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and str( doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][str(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode( hl, errors='replace')).replace( '<em>', '<em>').replace( '</em>', '</em>') for hl in hls ] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _( "The Solr schema requires an id field for performing the result highlighting" ) if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name]['counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts']['facet_queries']: for name, value in response['facet_counts']['facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] # Date range if collection_facet['properties']['isDate']: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'): counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in (f['d2']['buckets'] if 'd2' in f else []): _series[bucket['val']].append(f['val']) _series[bucket['val']].append(bucket['d2'] if 'd2' in bucket else bucket['count']) for name, val in _series.iteritems(): _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'): # Single dimension or dimension 2 with analytics dimension = 1 counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])] counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value if not query.get('download'): link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode(doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [escape(smart_unicode(hl, errors='replace')).replace('<em>', '<em>').replace('</em>', '</em>') for hl in hls] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _("The Solr schema requires an id field for performing the result highlighting") if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented
def augment_solr_response(response, collection, query): augmented = response augmented['normalized_facets'] = [] NAME = '%(field)s-%(id)s' normalized_facets = [] selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']]) if response and response.get('facet_counts'): for facet in collection['facets']: category = facet['type'] if category == 'field' and response['facet_counts']['facet_fields']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = pairwise2( facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name]) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, } normalized_facets.append(facet) elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']: name = NAME % facet collection_facet = get_facet_field(category, name, collection['facets']) counts = response['facet_counts']['facet_ranges'][name][ 'counts'] end = response['facet_counts']['facet_ranges'][name]['end'] counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet) facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': [] } normalized_facets.append(facet) elif category == 'query' and response['facet_counts'][ 'facet_queries']: for name, value in response['facet_counts'][ 'facet_queries'].iteritems(): collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'pivot': name = NAME % facet if 'facet_pivot' in response[ 'facet_counts'] and name in response['facet_counts'][ 'facet_pivot']: if facet['properties']['scope'] == 'stack': count = _augment_pivot_2d( name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values) else: count = response['facet_counts']['facet_pivot'][name] _augment_pivot_nd(facet['id'], count, selected_values) else: count = [] facet = { 'id': facet['id'], 'field': name, 'type': category, 'label': name, 'counts': count, } normalized_facets.append(facet) if response and response.get('facets'): for facet in collection['facets']: category = facet['type'] name = facet['id'] # Nested facets can only have one name if category == 'function' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) facet = { 'id': collection_facet['id'], 'query': name, 'type': category, 'label': name, 'counts': value, } normalized_facets.append(facet) elif category == 'nested' and name in response['facets']: value = response['facets'][name] collection_facet = get_facet_field(category, name, collection['facets']) extraSeries = [] counts = response['facets'][name]['buckets'] # Date range if collection_facet['properties']['isDate']: dimension = 3 # Single dimension or dimension 2 with analytics if not collection_facet['properties'][ 'facets'] or collection_facet['properties'][ 'facets'][0]['aggregate'] not in ('count', 'unique'): counts = [ _v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count']) ] counts = range_pair( facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet) else: # Dimension 1 with counts and 2 with analytics _series = collections.defaultdict(list) for f in counts: for bucket in (f['d2']['buckets'] if 'd2' in f else []): _series[bucket['val']].append(f['val']) _series[bucket['val']].append( bucket['d2'] if 'd2' in bucket else bucket['count']) for name, val in _series.iteritems(): _c = range_pair( facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet) extraSeries.append({'counts': _c, 'label': name}) counts = [] elif not collection_facet['properties'][ 'facets'] or collection_facet['properties']['facets'][ 0]['aggregate'] not in ('count', 'unique'): # Single dimension or dimension 2 with analytics dimension = 1 counts = [ _v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count']) ] counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts) else: # Dimension 1 with counts and 2 with analytics dimension = 2 counts = _augment_stats_2d(name, facet, counts, selected_values) if collection_facet['properties']['sort'] == 'asc': counts.reverse() facet = { 'id': collection_facet['id'], 'field': facet['field'], 'type': category, 'label': collection_facet['label'], 'counts': counts, 'extraSeries': extraSeries, 'dimension': dimension } normalized_facets.append(facet) # Remove unnecessary facet data if response: response.pop('facet_counts') response.pop('facets') # HTML escaping if not query.get('download'): for doc in response['response']['docs']: for field, value in doc.iteritems(): if isinstance(value, numbers.Number): escaped_value = value elif isinstance(value, list): # Multivalue field escaped_value = [ smart_unicode(val, errors='replace') for val in value ] else: value = smart_unicode(value, errors='replace') escaped_value = escape(value) doc[field] = escaped_value link = None if 'link-meta' in doc: meta = json.loads(doc['link-meta']) link = get_data_link(meta) doc['externalLink'] = link doc['details'] = [] highlighted_fields = response.get('highlighting', {}).keys() if highlighted_fields and not query.get('download'): id_field = collection.get('idField') if id_field: for doc in response['response']['docs']: if id_field in doc and smart_unicode( doc[id_field]) in highlighted_fields: highlighting = response['highlighting'][smart_unicode( doc[id_field])] if highlighting: escaped_highlighting = {} for field, hls in highlighting.iteritems(): _hls = [ escape(smart_unicode( hl, errors='replace')).replace( '<em>', '<em>').replace( '</em>', '</em>') for hl in hls ] escaped_highlighting[field] = _hls doc.update(escaped_highlighting) else: response['warning'] = _( "The Solr schema requires an id field for performing the result highlighting" ) if normalized_facets: augmented['normalized_facets'].extend(normalized_facets) return augmented