Exemple #1
0
def augment_response(collection, query, response):
  # HTML escaping
  if not query.get('download'):
    id_field = collection.get('idField', '')

    for doc in response['response']['docs']:
      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)
      elif 'link' in doc:
        meta = {'type': 'link', 'link': doc['link']}
        link = get_data_link(meta)

      for field, value in doc.items():
        if isinstance(value, numbers.Number):
          escaped_value = value
        elif field == '_childDocuments_': # Nested documents
          escaped_value = value
        elif isinstance(value, list): # Multivalue field
          escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
        else:
          value = smart_unicode(value, errors='replace')
          escaped_value = escape(value)
        doc[field] = escaped_value

      doc['externalLink'] = link
      doc['details'] = []
      doc['hueId'] = smart_unicode(doc.get(id_field, ''))
      if 'moreLikeThis' in response and response['moreLikeThis'][doc['hueId']].get('numFound'):
        _doc = response['moreLikeThis'][doc['hueId']]
        doc['_childDocuments_'] = _doc['docs']
        doc['numFound'] = _doc['numFound']
        del response['moreLikeThis'][doc['hueId']]

  highlighted_fields = list(response.get('highlighting', {}).keys())
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.items():
              _hls = [
                escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>')
                for hl in hls
              ]
              escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")
Exemple #2
0
def test_get_data_link():
  assert_equal(None, get_data_link({}))
  assert_equal('gethue.com', get_data_link({'type': 'link', 'link': 'gethue.com'}))

  assert_equal('/hbase/#Cluster/document_demo/query/20150527', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527'}))
  assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1'}))
  assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1:c1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1', 'col': 'c1'}))

  assert_equal('/filebrowser/view=/data/hue/1', get_data_link({'type': 'hdfs', 'path': '/data/hue/1'}))
  assert_equal('/metastore/table/default/sample_07', get_data_link({'type': 'hive', 'database': 'default', 'table': 'sample_07'}))
Exemple #3
0
def test_get_data_link():
  assert_equal(None, get_data_link({}))
  assert_equal('gethue.com', get_data_link({'type': 'link', 'link': 'gethue.com'}))

  assert_equal('/hbase/#Cluster/document_demo/query/20150527', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527'}))
  assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1'}))
  assert_equal('/hbase/#Cluster/document_demo/query/20150527[f1:c1]', get_data_link({'type': 'hbase', 'table': 'document_demo', 'row_key': '20150527', 'fam': 'f1', 'col': 'c1'}))

  assert_equal('/filebrowser/view=/data/hue/1', get_data_link({'type': 'hdfs', 'path': '/data/hue/1'}))
  assert_equal('/metastore/table/default/sample_07', get_data_link({'type': 'hive', 'database': 'default', 'table': 'sample_07'}))
Exemple #4
0
def test_get_data_link():
    assert_equal(None, get_data_link({}))
    assert_equal("gethue.com", get_data_link({"type": "link", "link": "gethue.com"}))

    assert_equal(
        "/hbase/#Cluster/document_demo/query/20150527",
        get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527"}),
    )
    assert_equal(
        "/hbase/#Cluster/document_demo/query/20150527[f1]",
        get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527", "fam": "f1"}),
    )
    assert_equal(
        "/hbase/#Cluster/document_demo/query/20150527[f1:c1]",
        get_data_link({"type": "hbase", "table": "document_demo", "row_key": "20150527", "fam": "f1", "col": "c1"}),
    )

    assert_equal("/filebrowser/view=/data/hue/1", get_data_link({"type": "hdfs", "path": "/data/hue/1"}))
    assert_equal(
        "/metastore/table/default/sample_07",
        get_data_link({"type": "hive", "database": "default", "table": "sample_07"}),
    )
Exemple #5
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        # Date range
        if collection_facet['properties']['isDate']:
          dimension = 3
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'):
            counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics
            _series = collections.defaultdict(list)
            for f in counts:
              for bucket in (f['d2']['buckets'] if 'd2' in f else []):
                _series[bucket['val']].append(f['val'])
                _series[bucket['val']].append(bucket['d2'] if 'd2' in bucket else bucket['count'])
            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate'] not in ('count', 'unique'):
          # Single dimension or dimension 2 with analytics
          dimension = 1
          counts = [_v for _f in counts for _v in (_f['val'], _f['d2'] if 'd2' in _f else _f['count'])]
          counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 1 with counts and 2 with analytics
          dimension = 2
          counts = _augment_stats_2d(name, facet, counts, selected_values)

        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()

        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  for doc in response['response']['docs']:
    for field, value in doc.iteritems():
      if isinstance(value, numbers.Number):
        escaped_value = value
      else:
        value = smart_unicode(value, errors='replace')
        escaped_value = escape(value)
      doc[field] = escaped_value

    if not query.get('download'):
      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Exemple #6
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        cols = ['%(field)s' % facet, 'count(%(field)s)' % facet]
        last_x_col = 0
        last_xx_col = 0
        for i, f in enumerate(facet['properties']['facets']):
          if f['aggregate']['function'] == 'count':
            cols.append(f['field'])
            last_xx_col = last_x_col
            last_x_col = i + 2
          cols.append(SolrApi._get_aggregate_function(f))
        rows = []

        # For dim in dimensions

        # Number or Date range
        if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field':
          dimension = 3
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1:
            column = 'count'
            if len(collection_facet['properties']['facets']) == 1:
              agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
              legend = agg_keys[0].split(':', 2)[1]
              column = agg_keys[0]
            else:
              legend = facet['field'] # 'count(%s)' % legend
              agg_keys = [column]

            _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics

            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
            agg_keys.sort(key=lambda a: a[4:])

            if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
              agg_keys.insert(0, 'count')
            counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            _series = collections.defaultdict(list)

            for row in rows:
              for i, cell in enumerate(row):
                if i > last_x_col:
                  legend = cols[i]
                  if last_xx_col != last_x_col:
                    legend = '%s %s' % (cols[i], row[last_x_col])
                  _series[legend].append(row[last_xx_col])
                  _series[legend].append(cell)

            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif collection_facet['properties'].get('isOldPivot'):
          facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count']
 
          column = 'count'
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
#             _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          
          print counts
#           count = response['facets'][name]
#           _convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
          dimension = len(facet_fields)
        elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1):
          # Dimension 1 with 1 count or agg
          dimension = 1

          column = 'count'
          if len(collection_facet['properties']['facets']) == 1:
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
            legend = agg_keys[0].split(':', 2)[1]
            column = agg_keys[0]
          else:
            legend = facet['field']
            agg_keys = [column]

          _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
          counts = pairwise2(legend, selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 2 with analytics or 1 with N aggregates
          dimension = 2
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']])

          counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts)

        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension,
          'response': {'response': {'start': 0, 'numFound': response['facets'][name]['numBuckets']}}, # Todo * nested buckets + offsets
          'docs': [dict(zip(cols, row)) for row in rows],
          'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols]
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  if not query.get('download'):
    id_field = collection.get('idField', '')

    for doc in response['response']['docs']:
      for field, value in doc.iteritems():
        if isinstance(value, numbers.Number):
          escaped_value = value
        elif field == '_childDocuments_': # Nested documents
          escaped_value = value
        elif isinstance(value, list): # Multivalue field
          escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
        else:
          value = smart_unicode(value, errors='replace')
          escaped_value = escape(value)
        doc[field] = escaped_value

      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []
      doc['hueId'] = smart_unicode(doc.get(id_field, ''))

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented
Exemple #7
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented['normalized_facets'] = []
    NAME = '%(field)s-%(id)s'
    normalized_facets = []

    selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

    if response and response.get('facet_counts'):
        for facet in collection['facets']:
            category = facet['type']

            if category == 'field' and response['facet_counts']['facet_fields']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = pairwise2(
                    facet['field'], selected_values.get(facet['id'], []),
                    response['facet_counts']['facet_fields'][name])
                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                }
                normalized_facets.append(facet)
            elif (category == 'range' or category
                  == 'range-up') and response['facet_counts']['facet_ranges']:
                name = NAME % facet
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                counts = response['facet_counts']['facet_ranges'][name][
                    'counts']
                end = response['facet_counts']['facet_ranges'][name]['end']
                counts = range_pair(facet['field'], name,
                                    selected_values.get(facet['id'], []),
                                    counts, end, collection_facet)
                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': []
                }
                normalized_facets.append(facet)
            elif category == 'query' and response['facet_counts'][
                    'facet_queries']:
                for name, value in response['facet_counts'][
                        'facet_queries'].iteritems():
                    collection_facet = get_facet_field(category, name,
                                                       collection['facets'])
                    facet = {
                        'id': collection_facet['id'],
                        'query': name,
                        'type': category,
                        'label': name,
                        'counts': value,
                    }
                    normalized_facets.append(facet)
            elif category == 'pivot':
                name = NAME % facet
                if 'facet_pivot' in response[
                        'facet_counts'] and name in response['facet_counts'][
                            'facet_pivot']:
                    if facet['properties']['scope'] == 'stack':
                        count = _augment_pivot_2d(
                            name, facet['id'],
                            response['facet_counts']['facet_pivot'][name],
                            selected_values)
                    else:
                        count = response['facet_counts']['facet_pivot'][name]
                        _augment_pivot_nd(facet['id'], count, selected_values)
                else:
                    count = []
                facet = {
                    'id': facet['id'],
                    'field': name,
                    'type': category,
                    'label': name,
                    'counts': count,
                }
                normalized_facets.append(facet)

    if response and response.get('facets'):
        for facet in collection['facets']:
            category = facet['type']
            name = facet['id']  # Nested facets can only have one name

            if category == 'function' and name in response['facets']:
                value = response['facets'][name]
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                facet = {
                    'id': collection_facet['id'],
                    'query': name,
                    'type': category,
                    'label': name,
                    'counts': value,
                }
                normalized_facets.append(facet)
            elif category == 'nested' and name in response['facets']:
                value = response['facets'][name]
                collection_facet = get_facet_field(category, name,
                                                   collection['facets'])
                extraSeries = []
                counts = response['facets'][name]['buckets']

                # Date range
                if collection_facet['properties']['isDate']:
                    dimension = 3
                    # Single dimension or dimension 2 with analytics
                    if not collection_facet['properties'][
                            'facets'] or collection_facet['properties'][
                                'facets'][0]['aggregate'] not in ('count',
                                                                  'unique'):
                        counts = [
                            _v for _f in counts
                            for _v in (_f['val'],
                                       _f['d2'] if 'd2' in _f else _f['count'])
                        ]
                        counts = range_pair(
                            facet['field'], name,
                            selected_values.get(facet['id'], []), counts, 1,
                            collection_facet)
                    else:
                        # Dimension 1 with counts and 2 with analytics
                        _series = collections.defaultdict(list)
                        for f in counts:
                            for bucket in (f['d2']['buckets']
                                           if 'd2' in f else []):
                                _series[bucket['val']].append(f['val'])
                                _series[bucket['val']].append(
                                    bucket['d2'] if 'd2' in
                                    bucket else bucket['count'])
                        for name, val in _series.iteritems():
                            _c = range_pair(
                                facet['field'], name,
                                selected_values.get(facet['id'], []), val, 1,
                                collection_facet)
                            extraSeries.append({'counts': _c, 'label': name})
                        counts = []
                elif not collection_facet['properties'][
                        'facets'] or collection_facet['properties']['facets'][
                            0]['aggregate'] not in ('count', 'unique'):
                    # Single dimension or dimension 2 with analytics
                    dimension = 1
                    counts = [
                        _v for _f in counts
                        for _v in (_f['val'],
                                   _f['d2'] if 'd2' in _f else _f['count'])
                    ]
                    counts = pairwise2(facet['field'],
                                       selected_values.get(facet['id'], []),
                                       counts)
                else:
                    # Dimension 1 with counts and 2 with analytics
                    dimension = 2
                    counts = _augment_stats_2d(name, facet, counts,
                                               selected_values)

                if collection_facet['properties']['sort'] == 'asc':
                    counts.reverse()

                facet = {
                    'id': collection_facet['id'],
                    'field': facet['field'],
                    'type': category,
                    'label': collection_facet['label'],
                    'counts': counts,
                    'extraSeries': extraSeries,
                    'dimension': dimension
                }

                normalized_facets.append(facet)

        # Remove unnecessary facet data
        if response:
            response.pop('facet_counts')
            response.pop('facets')

    # HTML escaping
    if not query.get('download'):
        for doc in response['response']['docs']:
            for field, value in doc.iteritems():
                if isinstance(value, numbers.Number):
                    escaped_value = value
                elif isinstance(value, list):  # Multivalue field
                    escaped_value = [
                        smart_unicode(val, errors='replace') for val in value
                    ]
                else:
                    value = smart_unicode(value, errors='replace')
                    escaped_value = escape(value)
                doc[field] = escaped_value

            link = None
            if 'link-meta' in doc:
                meta = json.loads(doc['link-meta'])
                link = get_data_link(meta)

            doc['externalLink'] = link
            doc['details'] = []

    highlighted_fields = response.get('highlighting', {}).keys()
    if highlighted_fields and not query.get('download'):
        id_field = collection.get('idField')
        if id_field:
            for doc in response['response']['docs']:
                if id_field in doc and smart_unicode(
                        doc[id_field]) in highlighted_fields:
                    highlighting = response['highlighting'][smart_unicode(
                        doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(
                                    hl, errors='replace')).replace(
                                        '&lt;em&gt;', '<em>').replace(
                                            '&lt;/em&gt;', '</em>')
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls

                        doc.update(escaped_highlighting)
        else:
            response['warning'] = _(
                "The Solr schema requires an id field for performing the result highlighting"
            )

    if normalized_facets:
        augmented['normalized_facets'].extend(normalized_facets)

    return augmented
Exemple #8
0
def augment_solr_response(response, collection, query):
    augmented = response
    augmented["normalized_facets"] = []
    NAME = "%(field)s-%(id)s"
    normalized_facets = []

    selected_values = dict([(fq["id"], fq["filter"]) for fq in query["fqs"]])

    if response and response.get("facet_counts"):
        for facet in collection["facets"]:
            category = facet["type"]

            if category == "field" and response["facet_counts"]["facet_fields"]:
                name = NAME % facet
                collection_facet = get_facet_field(category, name, collection["facets"])
                counts = pairwise2(
                    facet["field"], selected_values.get(facet["id"], []), response["facet_counts"]["facet_fields"][name]
                )
                if collection_facet["properties"]["sort"] == "asc":
                    counts.reverse()
                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                }
                normalized_facets.append(facet)
            elif (category == "range" or category == "range-up") and response["facet_counts"]["facet_ranges"]:
                name = NAME % facet
                collection_facet = get_facet_field(category, name, collection["facets"])
                counts = response["facet_counts"]["facet_ranges"][name]["counts"]
                end = response["facet_counts"]["facet_ranges"][name]["end"]
                counts = range_pair(
                    facet["field"], name, selected_values.get(facet["id"], []), counts, end, collection_facet
                )
                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                    "extraSeries": [],
                }
                normalized_facets.append(facet)
            elif category == "query" and response["facet_counts"]["facet_queries"]:
                for name, value in response["facet_counts"]["facet_queries"].iteritems():
                    collection_facet = get_facet_field(category, name, collection["facets"])
                    facet = {
                        "id": collection_facet["id"],
                        "query": name,
                        "type": category,
                        "label": name,
                        "counts": value,
                    }
                    normalized_facets.append(facet)
            elif category == "pivot":
                name = NAME % facet
                if "facet_pivot" in response["facet_counts"] and name in response["facet_counts"]["facet_pivot"]:
                    if facet["properties"]["scope"] == "stack":
                        count = _augment_pivot_2d(
                            name, facet["id"], response["facet_counts"]["facet_pivot"][name], selected_values
                        )
                    else:
                        count = response["facet_counts"]["facet_pivot"][name]
                        _augment_pivot_nd(facet["id"], count, selected_values)
                else:
                    count = []
                facet = {"id": facet["id"], "field": name, "type": category, "label": name, "counts": count}
                normalized_facets.append(facet)

    if response and response.get("facets"):
        for facet in collection["facets"]:
            category = facet["type"]
            name = facet["id"]  # Nested facets can only have one name

            if category == "function" and name in response["facets"]:
                value = response["facets"][name]
                collection_facet = get_facet_field(category, name, collection["facets"])
                facet = {"id": collection_facet["id"], "query": name, "type": category, "label": name, "counts": value}
                normalized_facets.append(facet)
            elif category == "nested" and name in response["facets"]:
                value = response["facets"][name]
                collection_facet = get_facet_field(category, name, collection["facets"])
                extraSeries = []
                counts = response["facets"][name]["buckets"]

                # Date range
                if collection_facet["properties"]["isDate"]:
                    dimension = 3
                    # Single dimension or dimension 2 with analytics
                    if not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][
                        "aggregate"
                    ] not in ("count", "unique"):
                        counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])]
                        counts = range_pair(
                            facet["field"], name, selected_values.get(facet["id"], []), counts, 1, collection_facet
                        )
                    else:
                        # Dimension 1 with counts and 2 with analytics
                        _series = collections.defaultdict(list)
                        for f in counts:
                            for bucket in f["d2"]["buckets"] if "d2" in f else []:
                                _series[bucket["val"]].append(f["val"])
                                _series[bucket["val"]].append(bucket["d2"] if "d2" in bucket else bucket["count"])
                        for name, val in _series.iteritems():
                            _c = range_pair(
                                facet["field"], name, selected_values.get(facet["id"], []), val, 1, collection_facet
                            )
                            extraSeries.append({"counts": _c, "label": name})
                        counts = []
                elif not collection_facet["properties"]["facets"] or collection_facet["properties"]["facets"][0][
                    "aggregate"
                ] not in ("count", "unique"):
                    # Single dimension or dimension 2 with analytics
                    dimension = 1
                    counts = [_v for _f in counts for _v in (_f["val"], _f["d2"] if "d2" in _f else _f["count"])]
                    counts = pairwise2(facet["field"], selected_values.get(facet["id"], []), counts)
                else:
                    # Dimension 1 with counts and 2 with analytics
                    dimension = 2
                    counts = _augment_stats_2d(name, facet, counts, selected_values)

                if collection_facet["properties"]["sort"] == "asc":
                    counts.reverse()

                facet = {
                    "id": collection_facet["id"],
                    "field": facet["field"],
                    "type": category,
                    "label": collection_facet["label"],
                    "counts": counts,
                    "extraSeries": extraSeries,
                    "dimension": dimension,
                }

                normalized_facets.append(facet)

        # Remove unnecessary facet data
        if response:
            response.pop("facet_counts")
            response.pop("facets")

    # HTML escaping
    for doc in response["response"]["docs"]:
        for field, value in doc.iteritems():
            if isinstance(value, numbers.Number):
                escaped_value = value
            elif isinstance(value, list):  # Multivalue field
                escaped_value = [smart_unicode(val, errors="replace") for val in value]
            else:
                value = smart_unicode(value, errors="replace")
                escaped_value = escape(value)
            doc[field] = escaped_value

        if not query.get("download"):
            link = None
            if "link-meta" in doc:
                meta = json.loads(doc["link-meta"])
                link = get_data_link(meta)

            doc["externalLink"] = link
            doc["details"] = []

    highlighted_fields = response.get("highlighting", {}).keys()
    if highlighted_fields and not query.get("download"):
        id_field = collection.get("idField")
        if id_field:
            for doc in response["response"]["docs"]:
                if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
                    highlighting = response["highlighting"][smart_unicode(doc[id_field])]

                    if highlighting:
                        escaped_highlighting = {}
                        for field, hls in highlighting.iteritems():
                            _hls = [
                                escape(smart_unicode(hl, errors="replace"))
                                .replace("&lt;em&gt;", "<em>")
                                .replace("&lt;/em&gt;", "</em>")
                                for hl in hls
                            ]
                            escaped_highlighting[field] = _hls

                        doc.update(escaped_highlighting)
        else:
            response["warning"] = _("The Solr schema requires an id field for performing the result highlighting")

    if normalized_facets:
        augmented["normalized_facets"].extend(normalized_facets)

    return augmented
Exemple #9
0
def augment_solr_response(response, collection, query):
  augmented = response
  augmented['normalized_facets'] = []
  NAME = '%(field)s-%(id)s'
  normalized_facets = []

  selected_values = dict([(fq['id'], fq['filter']) for fq in query['fqs']])

  if response and response.get('facet_counts'):
    for facet in collection['facets']:
      category = facet['type']

      if category == 'field' and response['facet_counts']['facet_fields']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = pairwise2(facet['field'], selected_values.get(facet['id'], []), response['facet_counts']['facet_fields'][name])
        if collection_facet['properties']['sort'] == 'asc':
          counts.reverse()
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
        }
        normalized_facets.append(facet)
      elif (category == 'range' or category == 'range-up') and response['facet_counts']['facet_ranges']:
        name = NAME % facet
        collection_facet = get_facet_field(category, name, collection['facets'])
        counts = response['facet_counts']['facet_ranges'][name]['counts']
        end = response['facet_counts']['facet_ranges'][name]['end']
        counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, end, collection_facet)
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': []
        }
        normalized_facets.append(facet)
      elif category == 'query' and response['facet_counts']['facet_queries']:
        for name, value in response['facet_counts']['facet_queries'].iteritems():
          collection_facet = get_facet_field(category, name, collection['facets'])
          facet = {
            'id': collection_facet['id'],
            'query': name,
            'type': category,
            'label': name,
            'counts': value,
          }
          normalized_facets.append(facet)
      elif category == 'pivot':
        name = NAME % facet
        if 'facet_pivot' in response['facet_counts'] and name in response['facet_counts']['facet_pivot']:
          if facet['properties']['scope'] == 'stack':
            count = _augment_pivot_2d(name, facet['id'], response['facet_counts']['facet_pivot'][name], selected_values)
          else:
            count = response['facet_counts']['facet_pivot'][name]
            _augment_pivot_nd(facet['id'], count, selected_values)
        else:
          count = []
        facet = {
          'id': facet['id'],
          'field': name,
          'type': category,
          'label': name,
          'counts': count,
        }
        normalized_facets.append(facet)

  if response and response.get('facets'):
    for facet in collection['facets']:
      category = facet['type']
      name = facet['id'] # Nested facets can only have one name

      if category == 'function' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        facet = {
          'id': collection_facet['id'],
          'query': name,
          'type': category,
          'label': name,
          'counts': value,
        }
        normalized_facets.append(facet)
      elif category == 'nested' and name in response['facets']:
        value = response['facets'][name]
        collection_facet = get_facet_field(category, name, collection['facets'])
        extraSeries = []
        counts = response['facets'][name]['buckets']

        cols = ['%(field)s' % facet, 'count(%(field)s)' % facet]
        last_x_col = 0
        last_xx_col = 0
        for i, f in enumerate(facet['properties']['facets']):
          if f['aggregate']['function'] == 'count':
            cols.append(f['field'])
            last_xx_col = last_x_col
            last_x_col = i + 2
          cols.append(SolrApi._get_aggregate_function(f))
        rows = []

        # For dim in dimensions

        # Number or Date range
        if collection_facet['properties']['canRange'] and not facet['properties'].get('type') == 'field':
          dimension = 3 if collection_facet['properties']['isDate'] else 1
          # Single dimension or dimension 2 with analytics
          if not collection_facet['properties']['facets'] or collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1:
            column = 'count'
            if len(collection_facet['properties']['facets']) == 1:
              agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
              legend = agg_keys[0].split(':', 2)[1]
              column = agg_keys[0]
            else:
              legend = facet['field'] # 'count(%s)' % legend
              agg_keys = [column]

            _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
            counts = range_pair(facet['field'], name, selected_values.get(facet['id'], []), counts, 1, collection_facet)
          else:
            # Dimension 1 with counts and 2 with analytics
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
            agg_keys.sort(key=lambda a: a[4:])

            if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
              agg_keys.insert(0, 'count')
            counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

            _series = collections.defaultdict(list)

            for row in rows:
              for i, cell in enumerate(row):
                if i > last_x_col:
                  legend = cols[i]
                  if last_xx_col != last_x_col:
                    legend = '%s %s' % (cols[i], row[last_x_col])
                  _series[legend].append(row[last_xx_col])
                  _series[legend].append(cell)

            for name, val in _series.iteritems():
              _c = range_pair(facet['field'], name, selected_values.get(facet['id'], []), val, 1, collection_facet)
              extraSeries.append({'counts': _c, 'label': name})
            counts = []
        elif collection_facet['properties'].get('isOldPivot'):
          facet_fields = [collection_facet['field']] + [f['field'] for f in collection_facet['properties'].get('facets', []) if f['aggregate']['function'] == 'count']

          column = 'count'
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          #_convert_nested_to_augmented_pivot_nd(facet_fields, facet['id'], count, selected_values, dimension=2)
          dimension = len(facet_fields)
        elif not collection_facet['properties']['facets'] or (collection_facet['properties']['facets'][0]['aggregate']['function'] != 'count' and len(collection_facet['properties']['facets']) == 1):
          # Dimension 1 with 1 count or agg
          dimension = 1

          column = 'count'
          if len(collection_facet['properties']['facets']) == 1:
            agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_')]
            legend = agg_keys[0].split(':', 2)[1]
            column = agg_keys[0]
          else:
            legend = facet['field']
            agg_keys = [column]

          _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)

          counts = [_v for _f in counts for _v in (_f['val'], _f[column])]
          counts = pairwise2(legend, selected_values.get(facet['id'], []), counts)
        else:
          # Dimension 2 with analytics or 1 with N aggregates
          dimension = 2
          agg_keys = [key for key, value in counts[0].items() if key.lower().startswith('agg_') or key.lower().startswith('dim_')]
          agg_keys.sort(key=lambda a: a[4:])

          if len(agg_keys) == 1 and agg_keys[0].lower().startswith('dim_'):
            agg_keys.insert(0, 'count')
          counts = _augment_stats_2d(name, facet, counts, selected_values, agg_keys, rows)
          actual_dimension = 1 + sum([_f['aggregate']['function'] == 'count' for _f in collection_facet['properties']['facets']])

          counts = filter(lambda a: len(a['fq_fields']) == actual_dimension, counts)

        num_bucket = response['facets'][name]['numBuckets'] if 'numBuckets' in response['facets'][name] else len(response['facets'][name])
        facet = {
          'id': collection_facet['id'],
          'field': facet['field'],
          'type': category,
          'label': collection_facet['label'],
          'counts': counts,
          'extraSeries': extraSeries,
          'dimension': dimension,
          'response': {'response': {'start': 0, 'numFound': num_bucket}}, # Todo * nested buckets + offsets
          'docs': [dict(zip(cols, row)) for row in rows],
          'fieldsAttributes': [Collection2._make_gridlayout_header_field({'name': col, 'type': 'aggr' if '(' in col else 'string'}) for col in cols]
        }

        normalized_facets.append(facet)

    # Remove unnecessary facet data
    if response:
      response.pop('facet_counts')
      response.pop('facets')

  # HTML escaping
  if not query.get('download'):
    id_field = collection.get('idField', '')

    for doc in response['response']['docs']:
      for field, value in doc.iteritems():
        if isinstance(value, numbers.Number):
          escaped_value = value
        elif field == '_childDocuments_': # Nested documents
          escaped_value = value
        elif isinstance(value, list): # Multivalue field
          escaped_value = [smart_unicode(escape(val), errors='replace') for val in value]
        else:
          value = smart_unicode(value, errors='replace')
          escaped_value = escape(value)
        doc[field] = escaped_value

      link = None
      if 'link-meta' in doc:
        meta = json.loads(doc['link-meta'])
        link = get_data_link(meta)

      doc['externalLink'] = link
      doc['details'] = []
      doc['hueId'] = smart_unicode(doc.get(id_field, ''))

  highlighted_fields = response.get('highlighting', {}).keys()
  if highlighted_fields and not query.get('download'):
    id_field = collection.get('idField')
    if id_field:
      for doc in response['response']['docs']:
        if id_field in doc and smart_unicode(doc[id_field]) in highlighted_fields:
          highlighting = response['highlighting'][smart_unicode(doc[id_field])]

          if highlighting:
            escaped_highlighting = {}
            for field, hls in highlighting.iteritems():
              _hls = [escape(smart_unicode(hl, errors='replace')).replace('&lt;em&gt;', '<em>').replace('&lt;/em&gt;', '</em>') for hl in hls]
              escaped_highlighting[field] = _hls[0] if len(_hls) == 1 else _hls

            doc.update(escaped_highlighting)
    else:
      response['warning'] = _("The Solr schema requires an id field for performing the result highlighting")


  if normalized_facets:
    augmented['normalized_facets'].extend(normalized_facets)

  return augmented