def test_delete_article_index(app):
    """
    Test for delete Article index
    """
    with app.app_context():
        db.create_all()
    new_article = ArticleModel(
        title="ES title",
        tags=[],
        categories=[],
        unique_id="unique_id2",
        citation="citation",
        cfr40_part280="cfr40_part280",
        legal_language="en",
    )
    db.session.add(new_article)
    db.session.commit()
    db.session.delete(new_article)
    db.session.commit()

    resp = es.search(index=ARTICLE_INDEX,
                     body={"query": {
                         "term": {
                             "title": "ES title"
                         }
                     }})
    assert not resp["hits"]["total"]["value"]
Esempio n. 2
0
def get_locations_by_name(location_string, region_id):
  """
  Liefert Location-Einträge für einen Namen zurück.
  """
  query_parts = []
  for location_string in location_string.replace(',', '').split():
    query_parts.append({
              'multi_match': {
                'fields': ['name', 'bodyName', 'postalcode'],
                'type': 'phrase_prefix',
                'query': location_string
              }
            })
  query_parts.append({
    'terms': {
      'bodyId': app.config['regions'][region_id]['body'],
      'minimum_should_match': 1
    }
  })
  print query_parts
  result = es.search(
    index = app.config['es_location_index'] + '-latest',
    doc_type = 'street',
    fields = 'name,bodyName,postalcode,point',
    body = {
      'query': {
        'bool': {
          'must': query_parts
        }
      }
    },
    size = 10
  )
  
  locations = []
  if result['hits']['total']:
    for location in result['hits']['hits']:
      tmp_location = {
        'name': location['fields']['name'][0],
        'bodyName': location['fields']['bodyName'][0],
        'point': location['fields']['point'][0]
      }
      if 'postalcode' in location['fields']:
        tmp_location['postalcode'] = location['fields']['postalcode'][0]
      locations.append(tmp_location)
  return locations
Esempio n. 3
0
def get_locations_by_name(location_string, region_id):
  """
  Liefert Location-Einträge für einen Namen zurück.
  """
  query_parts = []
  for location_string in location_string.replace(',', '').split():
    query_parts.append({
              'multi_match': {
                'fields': ['name', 'bodyName', 'postalcode'],
                'type': 'phrase_prefix',
                'query': location_string
              }
            })
  query_parts.append({
    'terms': {
      'bodyId': app.config['regions'][region_id]['body'],
      'minimum_should_match': 1
    }
  })
  result = es.search(
    index = app.config['es_location_index'] + '-latest',
    doc_type = 'street',
    fields = 'name,bodyName,postalcode,point',
    body = {
      'query': {
        'bool': {
          'must': query_parts
        }
      }
    },
    size = 10
  )
  
  locations = []
  if result['hits']['total']:
    for location in result['hits']['hits']:
      tmp_location = {
        'name': location['fields']['name'][0],
        'bodyName': location['fields']['bodyName'][0],
        'point': location['fields']['point'][0]
      }
      if 'postalcode' in location['fields']:
        tmp_location['postalcode'] = location['fields']['postalcode'][0]
      locations.append(tmp_location)
  return locations
Esempio n. 4
0
def query_paper_num(region_id, q):
  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name,publishedDate',
    body = {
      'query': {
        'bool': {
          'must': [
            {
              'multi_match': {
                'fields': ['file.fulltext', 'file.name', 'name'],
                'type': 'phrase',
                'query': q
              }
            },
            {
              'terms': {
                'bodyId': app.config['regions'][region_id]['body'],
                'minimum_should_match': 1
              }
            }
          ]
        }
      }
    },
    size = 1,
    sort = 'publishedDate:desc'
  )
  if result['hits']['total']:
    return {
      'num': result['hits']['total'],
      'name': result['hits']['hits'][0]['fields']['name'][0],
      'publishedDate': result['hits']['hits'][0]['fields']['publishedDate'][0] if 'publishedDate' in result['hits']['hits'][0]['fields'] else None
    }
  else:
    return {
      'num': result['hits']['total']
    }
Esempio n. 5
0
def query_paper_num(region_id, q):
  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name,publishedDate',
    body = {
      'query': {
        'bool': {
          'must': [
            {
              'multi_match': {
                'fields': ['file.fulltext', 'file.name', 'name'],
                'type': 'phrase',
                'query': q
              }
            },
            {
              'terms': {
                'bodyId': app.config['regions'][region_id]['body'],
                'minimum_should_match': 1
              }
            }
          ]
        }
      }
    },
    size = 1,
    sort = 'publishedDate:desc'
  )
  if result['hits']['total']:
    return {
      'num': result['hits']['total'],
      'name': result['hits']['hits'][0]['fields']['name'][0],
      'publishedDate': result['hits']['hits'][0]['fields']['publishedDate'][0] if 'publishedDate' in result['hits']['hits'][0]['fields'] else None
    }
  else:
    return {
      'num': result['hits']['total']
    }
def region_search():
  start_time = time.time()
  result = []
  search_string = request.args.get('q', False)
  # generate fulltext search string
  if not search_string:
    search_results = []
  else:
    search_string = search_string.split()
    search_string_to_complete = search_string[-1]
    query_parts = []
    query_parts.append({
      'match_phrase_prefix': {
        'name': search_string_to_complete.lower()
      }
    })
    if len(search_string[0:-1]):
      query_parts.append({
        'query_string': {
          'fields': ['name'],
          'query': " ".join(search_string[0:-1]),
          'default_operator': 'and'
        }
      })
    try:
      result = es.search(
        index = "%s-latest" % app.config['REGION_ES'],
        doc_type = 'regions',
        fields = 'name,slug,postalcode,location',
        body = {
          'query': {
            'bool': {
              'must': query_parts
            }
          },
          'aggs': {
            'fragment': {
              'terms': {
                'field': 'name',
                'include': {
                  'pattern': search_string_to_complete.lower() + '.*',
                  'flags': 'CANON_EQ|CASE_INSENSITIVE',
                },
                'min_doc_count': 0,
                'size': 10
              }
            }
          }
        },
        size = 10
      )
    except elasticsearch.NotFoundError:
      abort(403)
    search_results = []
    for dataset in result['hits']['hits']:
      tmp_search_result = {
        'name': dataset['fields']['name'][0],
        'postalcode': dataset['fields']['postalcode'][0] if len(dataset['fields']['postalcode']) else None,
        'slug': dataset['fields']['slug'][0]
      }
      search_results.append(tmp_search_result)

  ret = {
    'status': 0,
    'duration': round((time.time() - start_time) * 1000),
    'response': search_results
  }
  json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True)
  response = make_response(json_output, 200)
  response.mimetype = 'application/json'
  response.headers['Expires'] = util.expires_date(hours=24)
  response.headers['Cache-Control'] = util.cache_max_age(hours=24)
  return(response)
Esempio n. 7
0
def search_traffic_items_es():
  start_time = time.time()
  limits = request.form.get('l', None)
  traffic_item_type = request.form.get('traffic_item_type', None)
  construction_site_date = request.form.get('date', None)
  occupancy_rate = request.form.get('occupancy_rate', None)
  zoom = request.form.get('zoom', None)
  saved_request = {
    'limits': limits,
    'traffic_item_type': traffic_item_type,
    'construction_site_date': construction_site_date,
    'occupancy_rate': occupancy_rate,
    'zoom': zoom
  }
  if limits:
    limits = limits.split(';')
  
  query_parts_must = []
  query_parts_should = []
  
  if traffic_item_type:
    traffic_item_type = traffic_item_type.split(',')
    query_parts_must.append({
      'terms': {
        'traffic_item_type': traffic_item_type
      }
    })
  
  
  if '1' in traffic_item_type:
    query_parts_should.append(
      {
        'bool': {
          'must': [
            {
              'range': {
                'start': {
                  'lte': construction_site_date
                }
              }
            },
            {
              'range': {
                'end': {
                  'gte': construction_site_date
                }
              }
            },
            {
              'term': {
                'traffic_item_type': 1
              }
            }
          ]
        }
      }
    )
    
  
  if '2' in traffic_item_type:
    query_parts_should.append(
      {
        'bool': {
          'must': [
            #{
            #  'range': {
            #    'occupancy_rate': {
            #      'gte': occupancy_rate
            #    }
            #  }
            #},
            {
              'term': {
                'traffic_item_type': 2
              }
            }
          ]
        }
      }
    )
  if limits:
    limit_queries = {}
    for limit in limits:
      if limit.find('<=') >= 0:
        limit_split = limit.split('<=')
        if (limit_split[0] not in limit_queries):
          limit_queries[limit_split[0]] = {}
        limit_queries[limit_split[0]]['lte'] = limit_split[1]
      elif limit.find('>=') >= 0:
        limit_split = limit.split('>=')
        if (limit_split[0] not in limit_queries):
          limit_queries[limit_split[0]] = {}
        limit_queries[limit_split[0]]['gte'] = limit_split[1]
      elif limit.find('>') >= 0:
        limit_split = limit.split('>')
        if (limit_split[0] not in limit_queries):
          limit_queries[limit_split[0]] = {}
        limit_queries[limit_split[0]]['lt'] = limit_split[1]
      elif limit.find('<') >= 0:
        limit_split = limit.split('<')
        if (limit_split[0] not in limit_queries):
          limit_queries[limit_split[0]] = {}
        limit_queries[limit_split[0]]['lt'] = limit_split[1]
    for limit_query_key, limit_query_value in limit_queries.iteritems():
      query_parts_must.append({
        'range': {
          limit_query_key: limit_query_value
        }
      })
  
  query = {
    'query': {
      'constant_score': {
        'filter': {
          'bool': {
            'must': [{"match_all": {}}] + query_parts_must,
            'should': query_parts_should
          }
        }
      }
    }
  }
  
  es_result = es.search(
    index = app.config['TRAFFIC_ITEMS_ES'] + '-latest',
    doc_type = 'traffic_item',
    fields = 'id,location.lat,location.lon,traffic_item_type,area,start,end,occupancy_rate',
    body = query,
    size = 10000
  )
  result = []
  for single in es_result['hits']['hits']:
    item = {
      'id': single['fields']['id'][0],
      'lat': single['fields']['location.lat'][0],
      'lon': single['fields']['location.lon'][0],
      'type': single['fields']['traffic_item_type'][0]
    }
    if 'area' in single['fields']:
      item['area'] = json.loads(single['fields']['area'][0])
    if 'start' in single['fields']:
      item['start'] = single['fields']['start'][0]
    if 'end' in single['fields']:
      item['end'] = single['fields']['end'][0]
    if 'occupancy_rate' in single['fields']:
      item['occupancy_rate'] = single['fields']['occupancy_rate'][0]
    result.append(item)
  ret = {
    'status': 0,
    'request': saved_request,
    'duration': round((time.time() - start_time) * 1000),
    'response': result
  }
  json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True)
  response = make_response(json_output, 200)
  response.mimetype = 'application/json'
  response.headers['Expires'] = util.expires_date(hours=24)
  response.headers['Cache-Control'] = util.cache_max_age(hours=24)
  return(response)
Esempio n. 8
0
def search_sharing_stations():
    start_time = time.time()
    fq = request.form.get('fq', '')
    limits = request.form.get('l', None)
    vehicle_all = request.form.get('vehicle_all', None)
    if vehicle_all == "0,20":
        vehicle_all = None
    vehicle_type = request.form.get('vehicle_type', None)
    if vehicle_type == '' or vehicle_type == '1,2,3,4,5':
        vehicle_type = None
    sort = request.form.get('sort', 'name.sort:asc')
    start = int(request.form.get('start', '0'))
    per_page = int(request.form.get('pp', '50'))
    view_type = request.form.get('vt', 's')

    saved_request = {'sort': sort, 'start': start, 'per_page': per_page}
    if fq:
        saved_request['fq'] = fq
    if limits:
        limits = limits.split(';')

    (sort_field, sort_order) = sort.split(':')
    if sort_field == 'score':
        sort_field = '_score'
    sort = {sort_field: {'order': sort_order}}

    query_parts_must = []
    query_parts_filter = []
    query_parts_should = []
    # all_count
    if vehicle_all:
        vehicle_all = vehicle_all.split(',')
        query_parts_must.append({
            'range': {
                'vehicle_all': {
                    'gte': vehicle_all[0],
                    'lte': 64 if vehicle_all[1] == '20' else vehicle_all[1]
                }
            }
        })
    # vehicle_type
    if vehicle_type:
        vehicle_type = vehicle_type.split(',')
        query_parts_filter.append({'terms': {'station_type': vehicle_type}})

    if limits:
        limit_queries = {}
        for limit in limits:
            if limit.find('<=') >= 0:
                limit_split = limit.split('<=')
                if (limit_split[0] not in limit_queries):
                    limit_queries[limit_split[0]] = {}
                limit_queries[limit_split[0]]['lte'] = limit_split[1]
            elif limit.find('>=') >= 0:
                limit_split = limit.split('>=')
                if (limit_split[0] not in limit_queries):
                    limit_queries[limit_split[0]] = {}
                limit_queries[limit_split[0]]['gte'] = limit_split[1]
            elif limit.find('>') >= 0:
                limit_split = limit.split('>')
                if (limit_split[0] not in limit_queries):
                    limit_queries[limit_split[0]] = {}
                limit_queries[limit_split[0]]['lt'] = limit_split[1]
            elif limit.find('<') >= 0:
                limit_split = limit.split('<')
                if (limit_split[0] not in limit_queries):
                    limit_queries[limit_split[0]] = {}
                limit_queries[limit_split[0]]['lt'] = limit_split[1]
        for limit_query_key, limit_query_value in limit_queries.iteritems():
            query_parts_must.append(
                {'range': {
                    limit_query_key: limit_query_value
                }})
    query = {
        'query': {
            'bool': {
                'must': [{
                    "match_all": {}
                }] + query_parts_must,
                'filter': query_parts_filter
            }
        }
    }

    if view_type == 's':
        fields = 'name,station_type,vehicle_all,location.lat,location.lon,sharing_provider.name,sharing_provider.slug'

    es_result = es.search(index=app.config['SHARING_STATION_ES'] + '-latest',
                          doc_type='sharing_station',
                          fields=fields,
                          body=query,
                          from_=start,
                          size=per_page,
                          sort=sort_field + ':' + sort_order)
    result = []
    for single in es_result['hits']['hits']:
        item = {
            'name': single['fields']['name'][0],
            'lat': single['fields']['location.lat'][0],
            'lon': single['fields']['location.lon'][0],
            'station_type': single['fields']['station_type'][0],
            'sharing_provider_slug':
            single['fields']['sharing_provider.slug'][0],
            'sharing_provider_name':
            single['fields']['sharing_provider.name'][0]
        }
        if 'vehicle_all' in single['fields']:
            item['vehicle_all'] = single['fields']['vehicle_all'][0]
        result.append(item)
    ret = {
        'status': 0,
        'request': saved_request,
        'duration': round((time.time() - start_time) * 1000),
        'response': result
    }
    json_output = json.dumps(ret, cls=util.MyEncoder, sort_keys=True)
    response = make_response(json_output, 200)
    response.mimetype = 'application/json'
    response.headers['Expires'] = util.expires_date(hours=24)
    response.headers['Cache-Control'] = util.cache_max_age(hours=24)
    return (response)
Esempio n. 9
0
def get_papers_live(search_string, region_id):
  search_string = search_string.split()
  if not len(search_string):
    return []
  search_string_to_complete = search_string[-1]
  
  query_parts = []
  
  query_parts.append({ 
    'match_phrase_prefix': {
      'text_all': search_string_to_complete
    }
  })
  
  query_parts.append({
    'terms': {
      'bodyId': app.config['regions'][region_id]['body'],
      'minimum_should_match': 1
    }
  })

  if len(search_string[0:-1]):
    query_parts.append({
      'query_string': {
        'fields': ['text_all'],
        'query': " ".join(search_string[0:-1]),
        'default_operator': 'and'
      }
    })

  print query_parts
  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name',
    body = {
      'query': {
        'bool': {
          'must': query_parts
        }
      },
      'aggs': {
        'fragment': {
          'terms': {
            'field': 'text_all',
            'include': {
              'pattern': search_string_to_complete + '.*',
              'flags': 'CANON_EQ|CASE_INSENSITIVE'
            },
            'size': 10
          }
        }
      }
    },
    size = 0
  )
  
  search_results = []
  prefix = ""
  if len(search_string[0:-1]):
    prefix = " ".join(search_string[0:-1]) + " "
  for search_result in result['aggregations']['fragment']['buckets']:
    tmp_search_result = {
      'name': prefix + search_result['key'].capitalize(),
      'count' : search_result['doc_count']
    }
    search_results.append(tmp_search_result)
  return search_results
Esempio n. 10
0
def query_paper(region=None, q='', fq=None, sort='score desc', start=0, papers_per_page=10, facets=None):
  (sort_field, sort_order) = sort.split(':')
  if sort_field == 'score':
    sort_field = '_score'
  sort = {sort_field: {'order': sort_order}}
  rest = True
  x = 0
  result = []
  while rest:
    y = fq.find(":", x)
    if y == -1:
      break
    temp = fq[x:y]
    x = y + 1
    if fq[x:x+5] == "&#34;":
      y = fq.find("&#34;", x+5)
      if y == -1:
        break
      result.append((temp, fq[x+5:y]))
      x = y + 6
      if x > len(fq):
        break
    else:
      y = fq.find(";", x)
      if y == -1:
        result.append((temp, fq[x:len(fq)]))
        break
      else:
        result.append((temp, fq[x:y]))
        x = y + 1
  facet_terms = []
  for sfq in result:
    if sfq[0] == 'publishedDate':
      (year, month) = sfq[1].split('-')
      date_start = datetime.datetime(int(year), int(month), 1)
      date_end = date_start + dateutil.relativedelta.relativedelta(months=+1,seconds=-1)
      facet_terms.append({
        'range': {
          'publishedDate': {
            'gt': date_start.isoformat('T'),
            'lt': date_end.isoformat('T')
          }
        }
      })
    else:
      facet_terms.append({
        'term': {
          sfq[0]: sfq[1]
        }
      })
  if region:
    facet_terms.append({
      'terms': {
        'bodyId': app.config['regions'][region]['body'],
        'minimum_should_match': 1
      }
    })
  
  # Let's see if there are some " "s in our search string
  matches = re.findall("&#34;(.*?)&#34;", q, re.DOTALL)
  match_query = []
  for match in matches:
    if match.strip():
      match_query.append({
        'multi_match': {
          'fields': ['file.fulltext', 'file.name', 'name'],
          'type': 'phrase',
          'query': match.strip()
        }
      })
    q = q.replace("&#34;" + match + "&#34;", "")
  q = q.replace("&#34;", "").strip()
  if q:
    simple_query = [{
      'query_string': {
        'fields': ['file.fulltext', 'file.name', 'name'],
        'query': q,
        'default_operator': 'and'
      }
    }]
  else:
    simple_query = []
  
  query = {
    'query': {
      'bool': {
        'must': simple_query + match_query + facet_terms
      }
    },
    'highlight': {
      'pre_tags' : ['<strong>'],
      'post_tags' : ['</strong>'],
      'fields': {
        'file.fulltext': {
          'fragment_size': 200,
          'number_of_fragments': 1
        }
      }
    },
    'aggs': {
      'publishedDate': {
        'date_histogram': {
          'field': 'publishedDate',
          'interval': 'month'
        }
      },
      'paperType': {
        'terms': {
          'field': 'paperType'
        }
      },
      'bodyName': {
        'terms': {
          'field': 'bodyName'
        }
      }
    },
  }

  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name,paperType,publishedDate,bodyId,bodyName,externalId,file.fulltext',
    body = query,
    from_ = start,
    size = 10,
    sort = sort_field + ':' + sort_order
  )
  
  ret = {
    'numhits': result['hits']['total'],
    'maxscore': result['hits']['max_score'],
    'result': [],
    'facets': {}
  }
  for r in result['hits']['hits']:
    ret['result'].append({
      'id': r['_id'],
      'score': r['_score'],
      'bodyId': r['fields']['bodyId'][0],
      'bodyName': r['fields']['bodyName'][0],
      'name': r['fields']['name'][0] if 'name' in r['fields'] else '',
      'paperType': r['fields']['paperType'][0] if 'paperType' in r['fields'] else '',
      'publishedDate': r['fields']['publishedDate'][0] if 'publishedDate' in r['fields'] else '',
      'fileFulltext': r['highlight']['file.fulltext'][0].strip() if 'highlight' in r else None
    })
  if result['hits']['max_score'] is not None:
    ret['maxscore'] = result['hits']['max_score']
  for key in result['aggregations']:
    ret['facets'][key] = {}
    if key == 'publishedDate':
      for subval in result['aggregations'][key]['buckets']:
        ret['facets'][key][datetime.datetime.fromtimestamp(int(subval['key'])/1000).strftime('%Y-%m')] = subval['doc_count']
    if key in ['paperType', 'bodyName']:
      for subval in result['aggregations'][key]['buckets']:
        ret['facets'][key][subval['key']] = subval['doc_count']
  return ret
Esempio n. 11
0
def get_papers_live(search_string, region_id):
  search_string = search_string.split()
  if not len(search_string):
    return []
  search_string_to_complete = search_string[-1]
  
  query_parts = []
  
  query_parts.append({ 
    'match_phrase_prefix': {
      'text_all': search_string_to_complete
    }
  })
  
  query_parts.append({
    'terms': {
      'bodyId': app.config['regions'][region_id]['body'],
      'minimum_should_match': 1
    }
  })

  if len(search_string[0:-1]):
    query_parts.append({
      'query_string': {
        'fields': ['text_all'],
        'query': " ".join(search_string[0:-1]),
        'default_operator': 'and'
      }
    })

  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name',
    body = {
      'query': {
        'bool': {
          'must': query_parts
        }
      },
      'aggs': {
        'fragment': {
          'terms': {
            'field': 'text_all',
            'include': {
              'pattern': search_string_to_complete + '.*',
              'flags': 'CANON_EQ|CASE_INSENSITIVE'
            },
            'size': 10
          }
        }
      }
    },
    size = 0
  )
  
  search_results = []
  prefix = ""
  if len(search_string[0:-1]):
    prefix = " ".join(search_string[0:-1]) + " "
  for search_result in result['aggregations']['fragment']['buckets']:
    tmp_search_result = {
      'name': prefix + search_result['key'].capitalize(),
      'count' : search_result['doc_count']
    }
    search_results.append(tmp_search_result)
  return search_results
Esempio n. 12
0
def query_paper(region=None, q='', fq=None, sort='score desc', start=0, papers_per_page=10, facets=None):
  (sort_field, sort_order) = sort.split(':')
  if sort_field == 'score':
    sort_field = '_score'
  sort = {sort_field: {'order': sort_order}}
  rest = True
  x = 0
  result = []
  while rest:
    y = fq.find(":", x)
    if y == -1:
      break
    temp = fq[x:y]
    x = y + 1
    if fq[x:x+5] == "&#34;":
      y = fq.find("&#34;", x+5)
      if y == -1:
        break
      result.append((temp, fq[x+5:y]))
      x = y + 6
      if x > len(fq):
        break
    else:
      y = fq.find(";", x)
      if y == -1:
        result.append((temp, fq[x:len(fq)]))
        break
      else:
        result.append((temp, fq[x:y]))
        x = y + 1
  facet_terms = []
  for sfq in result:
    if sfq[0] == 'publishedDate':
      (year, month) = sfq[1].split('-')
      date_start = datetime.datetime(int(year), int(month), 1)
      date_end = date_start + dateutil.relativedelta.relativedelta(months=+1,seconds=-1)
      facet_terms.append({
        'range': {
          'publishedDate': {
            'gt': date_start.isoformat('T'),
            'lt': date_end.isoformat('T')
          }
        }
      })
    else:
      facet_terms.append({
        'term': {
          sfq[0]: sfq[1]
        }
      })
  if region:
    facet_terms.append({
      'terms': {
        'bodyId': app.config['regions'][region]['body'],
        'minimum_should_match': 1
      }
    })
  
  # Let's see if there are some " "s in our search string
  matches = re.findall("&#34;(.*?)&#34;", q, re.DOTALL)
  match_query = []
  for match in matches:
    if match.strip():
      match_query.append({
        'multi_match': {
          'fields': ['file.fulltext', 'file.name', 'name'],
          'type': 'phrase',
          'query': match.strip()
        }
      })
    q = q.replace("&#34;" + match + "&#34;", "")
  q = q.replace("&#34;", "").strip()
  if q:
    simple_query = [{
      'query_string': {
        'fields': ['file.fulltext', 'file.name', 'name'],
        'query': q,
        'default_operator': 'and'
      }
    }]
  else:
    simple_query = []
  
  query = {
    'query': {
      'bool': {
        'must': simple_query + match_query + facet_terms
      }
    },
    'highlight': {
      'pre_tags' : ['<strong>'],
      'post_tags' : ['</strong>'],
      'fields': {
        'file.fulltext': {
          'fragment_size': 200,
          'number_of_fragments': 1
        }
      }
    },
    'aggs': {
      'publishedDate': {
        'date_histogram': {
          'field': 'publishedDate',
          'interval': 'month'
        }
      },
      'paperType': {
        'terms': {
          'field': 'paperType'
        }
      },
      'bodyName': {
        'terms': {
          'field': 'bodyName'
        }
      }
    },
  }

  result = es.search(
    index = app.config['es_paper_index'] + '-latest',
    doc_type = 'paper',
    fields = 'name,paperType,publishedDate,bodyId,bodyName,externalId,file.fulltext',
    body = query,
    from_ = start,
    size = 10,
    sort = sort_field + ':' + sort_order
  )
  
  ret = {
    'numhits': result['hits']['total'],
    'maxscore': result['hits']['max_score'],
    'result': [],
    'facets': {}
  }
  for r in result['hits']['hits']:
    ret['result'].append({
      'id': r['_id'],
      'score': r['_score'],
      'bodyId': r['fields']['bodyId'][0],
      'bodyName': r['fields']['bodyName'][0],
      'name': r['fields']['name'][0] if 'name' in r['fields'] else '',
      'paperType': r['fields']['paperType'][0] if 'paperType' in r['fields'] else '',
      'publishedDate': r['fields']['publishedDate'][0] if 'publishedDate' in r['fields'] else '',
      'fileFulltext': r['highlight']['file.fulltext'][0].strip() if 'highlight' in r else None
    })
  if result['hits']['max_score'] is not None:
    ret['maxscore'] = result['hits']['max_score']
  for key in result['aggregations']:
    ret['facets'][key] = {}
    if key == 'publishedDate':
      for subval in result['aggregations'][key]['buckets']:
        ret['facets'][key][datetime.datetime.fromtimestamp(int(subval['key'])/1000).strftime('%Y-%m')] = subval['doc_count']
    if key in ['paperType', 'bodyName']:
      for subval in result['aggregations'][key]['buckets']:
        ret['facets'][key][subval['key']] = subval['doc_count']
  return ret