Exemple #1
0
async def get_invalidWordsES(*,
                             projectName: str = Path(...),
                             fullMatch: Optional[bool] = False,
                             highlight: Optional[List[str]] = Query(['']),
                             showReturn: Optional[List[str]] = Query(['']),
                             searchItemID: Optional[str] = None,
                             searchItem: Optional[str] = None,
                             dateRange: Optional[List[str]] = Query(['', '']),
                             currentPage: int = 1,
                             pageSize: int = 10,
                             operatorFilter: Optional[List[str]] = Query(['']),
                             sourceFilter: Optional[List[str]] = Query([''])):
    """
    获取无效词列表EsS
    """

    projectId = await findProjectIdFromProjectName(
        dbPrefix,
        'Project',
        queryDict={'projectName': projectName},
        showDict={'_id': 1})
    if not projectId:
        raise HTTPException(status_code=503, detail='projectNotExist')

    # 页码起始
    start = 0
    end = 0
    # 带搜索的 es索引 (等价于 mongo中的 数据库)
    _index = f'KWM-{projectId}.InvalidDict'.lower()
    #print('_index', _index)

    s = Search()

    if operatorFilter != ['']:
        # 存在 categoryFilter 查询
        operatorFilter = unquote(operatorFilter[0], 'utf-8').split(',')
        #queryDict['operator'] = {'$in': operatorFilter}
        operatorFilter = '\"' + '\" \"'.join(operatorFilter) + '\"'
        #print('ccc',operatorFilter)
        q = Q("query_string", query=operatorFilter, fields=['operator'])
        s = s.query(q)

    if sourceFilter != ['']:
        # 存在 sourceFilter 查询
        sourceFilter = unquote(sourceFilter[0], 'utf-8').split(',')
        #queryDict['source'] = {'$in': sourceFilter}
        sourceFilter = '\"' + '\" \"'.join(sourceFilter) + '\"'
        q = Q("query_string", query=sourceFilter, fields=['source'])
        s = s.query(q)

    if dateRange != ['', '']:
        dateRange = unquote(dateRange[0], 'utf-8').split(',')
        #print('dateRange',dateRange)
        if dateRange != ['', '']:
            #s = s.query('range',**{'timestamp': {'gte': dateRange[0], 'lt': dateRange[1]}}) # 这种也可以,为了统一Q,使用下面的表达式
            r = Q(
                'range',
                **{'modifiedTime': {
                    'gte': dateRange[0],
                    'lt': dateRange[1]
                }})
            s = s.query(r)

    if searchItem:  # single不走 es,所以,此处只有 searchItem 。不会有 searchItemID
        # 有关键词查询
        #queryDict['word'] = {'$regex': searchItem, '$options': 'i'} # 查询包含,且不区分大小写
        q = Q("multi_match", query=f"{searchItem.strip()}", fields=['word'])
        s = s.query(q)

    # 返回哪些字段
    if showReturn != ['']:
        showReturn = unquote(showReturn[0], 'utf-8').split(',')
        s = s.source(includes=showReturn)
    else:
        s = s.source(includes=[])

    # 高亮哪些字段
    if highlight != ['']:
        highlight = unquote(highlight[0], 'utf-8').split(',')
        #print(highlight)
        s = s.highlight_options(order='score')
        s = s.highlight_options(pre_tags="<strong>")
        s = s.highlight_options(post_tags="</strong>")
        for ele in highlight:  # 每一个逐个添加高亮
            s = s.highlight(ele)

    # 返回页码
    if currentPage == 0 and pageSize == 0:
        # 返回所有数据
        s = s[0:
              10000]  # 这里写死了10000, 如果超过,会报错。最好的解决方法是 用 scan,但是 scan 不会排序。后面再解决
    else:
        start = (currentPage - 1) * pageSize
        end = start + pageSize
        s = s[start:end]

    # 执行
    try:
        response = await esRun(s.to_dict(),
                               _index)  #s.execute(ignore_cache=True)
    except Exception as e:
        print(e)
        return ({'count': 0, 'content': []})
    else:
        totalCount = response.hits.total.value
        temp = response.to_dict()['hits']['hits']
        result = []
        for item in temp:
            tt = {'_id': {'$oid': item['_id']}}
            tt.update(item['_source'])
            if item.get('highlight'):
                tt.update({'highlight': item['highlight']})
            if start >= 0 and end > 0:
                tt.update({'id': start + 1})
            result.append(tt)
            start = start + 1
        #print(result)
        return ({'count': totalCount, 'content': result})
def _build_query(q, cql):

    # this would be handled by the AST with the traverse of CQL model
    op, node = get_next_node(cql.__root__)
    q.operation = op
    if isinstance(node, list):
        query_list = []
        for elem in node:
            op, next_node = get_next_node(elem)
            if not getattr(next_node, 'between', 0) == 0:
                property = next_node.between.value.__root__.__root__.property
                lower = next_node.between.lower.__root__.__root__
                upper = next_node.between.upper.__root__.__root__
                query_list.append(
                    Q({'range': {
                        f'{property}': {
                            'gte': lower,
                            'lte': upper
                        }
                    }}))
            if not getattr(next_node, '__root__', 0) == 0:
                scalars = tuple(next_node.__root__.eq.__root__)
                property = scalars[0].__root__.property
                value = scalars[1].__root__.__root__
                query_list.append(Q({'match': {f'{property}': f'{value}'}}))
        q.must(query_list)
    elif not getattr(node, 'between', 0) == 0:
        property = node.between.value.__root__.__root__.property
        lower = None
        if not getattr(node.between.lower, '__root__', 0) == 0:
            lower = node.between.lower.__root__.__root__
        upper = None
        if not getattr(node.between.upper, '__root__', 0) == 0:
            upper = node.between.upper.__root__.__root__
        query = Q({'range': {f'{property}': {'gte': lower, 'lte': upper}}})
        q.must(query)
    elif not getattr(node, '__root__', 0) == 0:
        next_op, next_node = get_next_node(node)
        if not getattr(next_node, 'eq', 0) == 0:
            scalars = tuple(next_node.eq.__root__)
            property = scalars[0].__root__.property
            value = scalars[1].__root__.__root__
            query = Q({'match': {f'{property}': f'{value}'}})
            q.must(query)
    elif not getattr(node, 'intersects', 0) == 0:
        property = node.intersects.__root__[0].__root__.property
        if property == 'geometry':
            geom_type = node.intersects.__root__[
                1].__root__.__root__.__root__.type
            if geom_type.value == 'Polygon':
                coordinates = node.intersects.__root__[
                    1].__root__.__root__.__root__.coordinates
                coords_list = [
                    poly_coords.__root__ for poly_coords in coordinates[0]
                ]
                filter_ = Q({
                    'geo_shape': {
                        'geometry': {
                            'shape': {
                                'type': 'envelope',
                                'coordinates': get_envelope(coords_list)
                            },
                            'relation': 'intersects'
                        }
                    }
                })
                query_all = Q({'match_all': {}})
                q.must(query_all)
                q.filter(filter_)
    return q.build()
Exemple #3
0
def query(s, q):
    s.query = Q("query_string", query=q)
    return s
Exemple #4
0
    class Meta:
        """Configuration for OAI server search."""

        default_filter = Q('exists', field='_oai.id')
import time, datetime, threading
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search, Q

cipher = '4a:25:b7:26:47:d3:29:67:14:58:e6:48:e2:0a:8e:5f:6d:f1:67:fe:4a:1d:19:cf:30:80:2e:a2:c4:5a:65:77:20:66:28:ef:03:5e:88:b7:c4:fb:ba:8b:e5:c6:1d:b1:f1:94:7a:11:8b:d7:06:be:49:b7:3a:29:7b:7e:23:ae:5f:37:19:ff:6a:24:d3:60:fd:24:ac:96:38:dd:e1:8b:75:7a:80:20:88:80:8d:d3:b0:a9:8e:c6:9b:14:7f:5e:9a:3e:55:75:af:98:36:e7:fc:d5:aa:4e:08:5a:63:f4:60:45:22:95:e1:ca:09:bd:de:c1:13:b0:31:9c:d2:70:84:e3:59:25:f1:32:8a:f0:bb:b3:c8:ff:6a:e3:fd:35:80:11:92:9e:c3:61:c9:35:c4:9f:64:b0:96:66:1c:36:ac:01:9c:74:44:3c:af:9e:fb:e0:f0:a2:3d:ab:d5:03:6d:39:a7:ca:14:41:cf:e0:c1:f3:4b:f1:50:6e:1e:98:62:67:53:12:b9:43:b7:84:21:fe:64:c0:7d:ca:fe:b9:67:9a:8f:39:0b:83:09:ae:50:98:69:13:85:ad:3b:47:c6:47:e0:ec:f2:46:87:ca:83:83:79:fc:1b:de:10:81:83:d4:48:0b:70:82:c5:a9:38:23:9f:6c:04:5b:f4:e6:78:9e:af:2d:24:17:a4:bd:e9:45:7d:81:52:e2:7f:45:d2:2f:b2:ea:d2:a6:4f:a3:e6:59:4b:17:7d:75:6a:04:b1:fe:1c:31:22:a7:0e:07:3b:42:e6:d1:44:ea:dc:62:c4:68:23:83:0c:37:07:15:61:a2:25:8b:af:5c:fe:45:dd:5d:e7:25:db:e9:fb:f6:57:7a:66:71:c0:1b:bd:45:d0:4e:4e:5c:f2:ae:aa:6f:e7:a2:34:c5:5a:6a:19:ff:d0:de:eb:b7:db:58:80:16:fe:07:7a:c2:f1:43:4c:dc:57:4b:d7:a1:9c:66:b9:66:1c:03:7e:9d:81:f8:d6:a6:36:8b:5b:03:f4:a1:48:5e:fd:7b:99:d8:c3:69:99:93:68:0f:7e:61:2f:19:b9:e8:c2:e4:65:1f:fa:57:ff:8f:85:7e:dd:da:a4:df:5e:63:77:50:2a:64:ad:d2:66:5f:35:67:21:ff:07:1e:23:50:5d:0c:48:9f:89:c5:63:93:05:59:71:81:90:79:cd:0b:47:ec:4d:15:6b:df:dd:6a:7a:45:62:16:07:0d:4c:3d:f0:95:5b:01:d5:65:d4:29:a9:78:a1:ad:82:43:30:10:60:f2:07:08:9c:91:f3:ea:81:fa:cf:14:71:ae:eb:de:c4:8e:40:7c:ee:48:97:5a:c3:86:cf:a1:b7:f3:d8:30:50:4d:03:97:b5:30:de:91:44:69:13:77:57:90:11:d5:97:7f:a6:55:7e:19:e6:88:d9:c4:4f:8a:34:5d:53:90:aa:c8:40:23:9a:2f:73:b7:34:f5:af:5e:69:d0:c9:d6:40:b3:80:74:b9:c5:74:8a:4f:d4:18:d9:72:01:d8:ba:86:bb:39:f0:7b:f6:25:81:8b:54:a2:89:84:50:ed:55:6e:ae:75:e2:b7:f5:f2:ab:70:8d:07:d4:0f:8e:4e:25:83:39:27:c4:87:ac:a6:ad:02:1b:e2:16:b1:06:63:0c:0f:dc:b3:6c:d7:9f:29:78:e9:3a:d9:1d:1a:93:77:7d:b6:07:d6:3c:03:9c:16:e4:6e:11:30:38:d4:ea:a4:d5:8e:fd:ad:52:e4:19:3f:91:b1:bb:8e:51:fd:ca:d2:71:c5:fa:ab:f0:8b:44:fd:c5:be:dd:fb:b4:16:5a:e5:48:6f:82:a1:31:74:d8:43:b8:99:a3:da:40:85:7a:a0:38:82:dc:73:06:a5:53:7c:a4:51:df:ea:a1:27:a1:8c:f3:46:30:60:7d:54:09:af:7f:7b:09:90:17:79:0c:0a:d7:29:43:e3:00:1d:0f:71:70:a2:c0:b0:93:46:0f:5b:c1:5b:c7:76:a7:bd:1b:d6:82:0a:a4:cf:c6:f7:0b:19:19:1c:0c:21:df:cd:cd:b8:bf:6a:94:bf:80:28:03:94:a0:75:bc:83:82:1d:97:c8:7d:a7:ea:c0:f7:e4:f6:a3:c0:ba:ad:0b:c6:44:68:44:fe:cf:aa:2a:7d:e0:73:23:89:a3:5a:6b:ca:40:bd:8f:05:cc:1d:67:67:72:0f:83:28:4f:39:6f:4f:0a:72:2b:3e:43:5a:eb:b3:18:ba:c1:1f:fe:26:f9:46:ba:ec:a2:63:fa:fb:22:d4:c7:15:29:3f:8a:82:ea:a2:d9:b0:19:af:02:6d:c2:d0:78:bf:31:8e:80:1d:2c:34:ec:40:60:b9:3b:08:f7:e6:23:ae:dd:70:00:7c:0c:0f:e8:7f:91:30:8f:42:9d:c1:4d:08:22:97:b1:86:a5:84:9a:9a:df:c0:e3:82:ef:26:91:fb:17:5b:3d:b3:4c:73:e6:26:c7:54:d1:23:d1:fc:80:5e:af:f9:a5:e2:09:e6:9a:2e:b8:52:86:1a:d9:0a:3b:7c:77:d7:83:76:6e:e1:54:64:fc:da:98:7c:a2:1f:8a:39:a5:18:ea:20:71:9a:90:43:9f:19:e8:69:5c:08:a9:70:a4:62:74:e2:21:f8:9c:01:69:48:9a:b5:4c:18:2e:df:ca:1f:8c:9e:b7:c1:6a:e2:a4:d2:8f:3b:f1:cb:25:64:6f:37:31:db:9b:0c:5d:70:cd:42:15:0c:c8:37:78:92:19:4b:18:ec:b1:43:82:68:19:76:e3:fd'
ip_src = '192.168.2.182'
timestamp = 1563843257911
msg_type = 14

es = Elasticsearch('192.168.2.140:9200')
s = Search(using=es, index="cipher-*")
s = s[0:10000]
if msg_type == 12:
    q = Q('match', layers__kerberos_msg_type=11) & Q(
        'match', layers__ip_dst=ip_src) & Q(
            'match', layers__kerberos_cipher__keyword=cipher)

if msg_type == 14:
    q = (Q('match', layers__kerberos_msg_type=11)
         | Q('match', layers__kerberos_msg_type=13)) & Q(
             'match', layers__ip_dst=ip_src) & Q(
                 'match', layers__kerberos_cipher__keyword=cipher)
s1 = s.query(q)
response = s1.execute()
if len(response) != 0:
    print('normal')

else:
    qtime = Q('range',
              timestamp={
                  'gte': int(timestamp),
Exemple #6
0
    def get_filter(self, named_query):

        dic = self._filters.get(named_query)
        key, val = next(iter(dic.items()))
        return Q(key, **val)
Exemple #7
0
    def _build_query(self):
        query = Q()

        source = ['id']
        sort = []

        aggregations = {}
        query_string = None
        as_list = as_dict = False

        for action, value in self.steps:
            if action == 'order_by':
                for key in value:
                    if key.startswith('-'):
                        sort.append({key[1:]: 'desc'})
                    else:
                        sort.append(key)
            elif action == 'values':
                source.extend(value)
                as_list, as_dict = True, False
            elif action == 'values_dict':
                if value:
                    source.extend(value)
                as_list, as_dict = False, True
            elif action == 'query':
                query &= self._process_queries(value)
            elif action == 'filter':
                query &= self._process_filters(value)
            elif action == 'source':
                source.extend(value)
            elif action == 'aggregate':
                aggregations.update(value)
            elif action == 'filter_query_string':
                query_string = value
            else:
                raise NotImplementedError(action)

        # If we have a raw query string we are going to apply all sorts
        # of boosts and filters to improve relevance scoring.
        #
        # We are using the same rules that `search.filters:SearchQueryFilter`
        # implements to have a single-source of truth for how our
        # scoring works.
        from olympia.search.filters import SearchQueryFilter

        search = Search().query(query)

        if query_string:
            search = SearchQueryFilter().apply_search_query(
                query_string, search)

        if sort:
            search = search.sort(*sort)

        if source:
            search = search.source(source)

        body = search.to_dict()

        # These are manually added for now to simplify a partial port to
        # elasticsearch-dsl
        if self.start:
            body['from'] = self.start
        if self.stop is not None:
            body['size'] = self.stop - self.start
        if aggregations:
            body['aggs'] = aggregations

        self.source, self.as_list, self.as_dict = source, as_list, as_dict
        return body
Exemple #8
0
 def get_es_query(self):
     return [Q(self.operator, **{self.es_field: self.get_value()})]
Exemple #9
0
def count_logs(resources, userid=0):
    s = Search().extra(size=0)

    conds = []

    for res in resources:
        conds.append(
            Q('match', **{'context__' + res._my_subclass + '_id': res.id}))

    if userid != 0:
        s = s.query('bool',
                    must=[
                        Q("range", datetime={
                            'gte': 'now-7d',
                            'lte': 'now-1d'
                        }),
                        Q("match", component="resources"),
                        Q('bool',
                          should=[
                              Q('match', action='access'),
                              Q('match', action='view')
                          ]),
                        Q('bool', should=conds),
                        Q('match', user_id=userid)
                    ])
    else:
        s = s.query('bool',
                    must=[
                        Q("range", datetime={
                            'gte': 'now-7d',
                            'lte': 'now-1d'
                        }),
                        Q("match", component="resources"),
                        Q('bool',
                          should=[
                              Q('match', action='access'),
                              Q('match', action='view')
                          ]),
                        Q('bool', should=conds)
                    ])

    return s
Exemple #10
0
def main():

    print("")
    print(" /\ /\  ___  ___ _ __ / /(_)_ __   ___ ")
    print("/ / \ \/ __|/ _ \ '__/ / | | '_ \ / _ \\")
    print("\ \_/ /\__ \  __/ | / /__| | | | |  __/")
    print(" \___/ |___/\___|_| \____/_|_| |_|\___|  v{}".format(
        config.VERSION))
    print("")
    print("Author: Chema Garcia (aka sch3m4)")
    print("        @sch3m4")
    print("        https://github.com/thiber-org/userline")
    print("")

    log = logging.getLogger(config.APP_NAME)
    log.setLevel(logging.INFO)
    std = logging.StreamHandler(sys.stdout)
    std.setLevel(logging.INFO)
    formatter = logging.Formatter(config.LOG_FORMAT)
    std.setFormatter(formatter)
    log.addHandler(std)

    parser = argparse.ArgumentParser()

    required = parser.add_argument_group('Required arguments')
    required.add_argument(
        "-H",
        "--eshosts",
        help=
        "Single or comma separated list of ElasticSearch hosts to query (default: localhost)",
        default=defaults.ES_HOSTS)
    required.add_argument("-S",
                          "--pool-size",
                          help="Connection pool size (default: {})".format(
                              defaults.ES_POOL_SIZE),
                          type=int,
                          default=defaults.ES_POOL_SIZE)
    required.add_argument("-i",
                          "--index",
                          help="Index name/pattern",
                          required=True)

    aux = parser.add_argument_group('Actions')
    action = aux.add_mutually_exclusive_group(required=True)
    action.add_argument("-L",
                        "--last-shutdown",
                        help="Gets last shutdown data",
                        action='store_true',
                        default=False)
    action.add_argument("-E",
                        "--last-event",
                        help="Gets last event data",
                        action='store_true',
                        default=False)
    action.add_argument("-l",
                        "--logons",
                        help="Shows user logon activity",
                        action='store_true',
                        default=False)
    action.add_argument("-w",
                        "--who-was-at",
                        help="Shows only logged on users at a given time",
                        metavar="DATE")

    output = parser.add_argument_group('Output')
    output.add_argument("-c",
                        "--csv-output",
                        help="CSV Output file",
                        type=argparse.FileType('w'),
                        metavar="PATH")
    output.add_argument(
        "-n",
        "--neo4j",
        help="Neo4j bolt with auth (format: bolt://user:pass@host:port)",
        metavar="BOLT")

    csvout = parser.add_argument_group('CSV options')
    csvout.add_argument("-F",
                        "--disable-timeframe",
                        help="Do not create timeframe entries",
                        action='store_true',
                        default=False)

    neoargs = parser.add_argument_group('Neo4J options')
    neoargs.add_argument(
        "-f",
        "--neo4j-full-info",
        help="Saves full logon/logoff info in Neo4j relations",
        action='store_true',
        default=False)
    neoargs.add_argument("-s",
                         "--unique-logon-rels",
                         help="Sets unique logon relations",
                         action='store_true',
                         default=False)

    optional = parser.add_argument_group('Optional filtering arguments')
    optional.add_argument(
        "-t",
        "--min-date",
        help="Searches since specified date (default: {})".format(
            defaults.MIN_DATE),
        default=defaults.MIN_DATE)
    optional.add_argument(
        "-T",
        "--max-date",
        help="Searches up to specified date (default: {})".format(
            defaults.MAX_DATE),
        default=defaults.MAX_DATE)
    optional.add_argument("-p", "--pattern", help="Includes pattern in search")
    optional.add_argument(
        "-I",
        "--include-local",
        help="Includes local services logons (default: Excluded)",
        action='store_true',
        default=False)
    optional.add_argument(
        "-k",
        "--include-locks",
        help="Includes workstation/screensaver lock events (default: Excluded)",
        action='store_true',
        default=False)
    optional.add_argument("-v",
                          "--verbose",
                          help="Enables verbose mode",
                          action='store_true',
                          default=False)

    extrainfo = parser.add_argument_group('Extra information')
    extrainfo.add_argument("-m",
                           "--mark-if-logged-at",
                           help="Marks logged in users at a given time",
                           metavar="DATETIME")

    args = parser.parse_args()

    if args.last_event is False and args.logons is False and args.who_was_at is None and args.last_shutdown is False:
        log.critical("You need to specify at least one action argument")
        return

    if args.verbose is True:
        log.setLevel(logging.DEBUG)
        std.setLevel(logging.DEBUG)

    try:
        mindate = int(dateparser.parse(args.min_date).timestamp() * 10**3)
        maxdate = int(dateparser.parse(args.max_date).timestamp() * 10**3)
        if args.who_was_at is not None:
            whowasat = int(
                dateparser.parse(args.who_was_at).timestamp() * 10**3)
        if args.mark_if_logged_at is not None:
            whowasat = int(
                dateparser.parse(args.mark_if_logged_at).timestamp() * 10**3)
    except Exception as exc:
        log.critical("Error parsing date: {}".format(exc))
        return

    # setup elasticsearch
    connections.create_connection(hosts=args.eshosts.split(','),
                                  maxsize=args.pool_size)
    conn = connections.get_connection()

    # shows last shutdown
    if args.last_shutdown is True:
        aux = utils.get_last_shutdown(args.index, maxdate)
        if aux is not None:
            log.info("Last shutdown:")
            for k in aux.keys():
                item = aux[k]
                evt = utils.build_event_from_source(item)
                lastraw = utils.get_last_event(args.index, evt['computer'])
                lastevt = utils.build_event_from_source(lastraw)
                uptime = timedelta(
                    microseconds=(lastevt['timestamp'] - evt['timestamp']) *
                    10**3)
                log.info("Computer: {}".format(evt['computer']))
                log.info("\t- Datetime: {}".format(evt['datetime']))
                log.info("\t- Uptime:   {}".format(uptime))
                log.info("\t- EvtIndex: {}".format(evt['index']))
                log.info("\t- EvtId:    {}".format(evt['sourceid']))
        else:
            log.info("No shutdown found")
        return

    # shows last stored event
    if args.last_event is True:
        aux = utils.get_last_event(args.index)
        if aux is not None:
            log.info("Last event:")
            for k in aux.keys():
                lastevt = utils.build_event_from_source(aux[k])
                log.info("Computer: {}".format(lastevt['computer']))
                log.info(json.dumps(lastevt, sort_keys=True, indent=4))
        else:
            log.info("No events found")
        return

    # we need an output format
    if args.csv_output is None and args.neo4j is None:
        log.critical("This option requires CSV/Neo4J output")
        return

    csv = None
    if args.csv_output is not None:
        csv = CSV(args.csv_output)
        if args.mark_if_logged_at is None:
            csv.disable_mark()

    neo = None
    if args.neo4j is not None:
        neo = Neo4J(args.neo4j)

    log.info("Building query")
    # Look for first required events
    q = Q('match',
          data_type='windows:evtx:record') & utils.get_dsl_logon_query(
              args.include_locks)

    if args.pattern is not None:
        q = q & Q('query_string', query=args.pattern, analyze_wildcard=True)

    s = Search(using=conn,
               index=args.index).query(q).filter('range',
                                                 datetime={
                                                     'gte': mindate,
                                                     'lte': maxdate
                                                 }).sort('datetime')

    log.debug("Getting events count")
    total = s.execute().hits.total
    log.info("Found {} events to be processed".format(total))

    # timeframe
    if total > 0 and csv is not None and args.disable_timeframe is False:
        frame = dict(config.EVENT_STRUCT)
        for k in frame.keys():
            frame[k] = "-" * 10
        frame[config.CSV_FIELDS[0]] = "TIMEFRAME/START"
        frame['logon.datetime'] = args.min_date
        frame['logon.timestamp'] = mindate
        frame['logoff.datetime'] = args.min_date
        frame['logoff.timestamp'] = mindate
        csv.add_sequence(frame)

    count = 0
    proglen = 0
    progress = 0
    begin = time.time()
    log.info("Processing events")
    for hit in s.scan():
        login = utils.build_event_from_source(hit)
        log.debug("Got logon event: {}".format(login['id']))
        duration = ''
        logout = None

        # local service check
        if args.include_local is False and ( \
             login['domain'] == config.LOCAL_DOMAIN or \
             login['username'].upper() == "{}$".format(login['computer'].split('.')[0]).upper() or \
             login['logonid'] == config.CONSTANT_NA \
            ):
            discard = True
            log.debug("Discarding event")

        # workstation/screensaver locks
        elif args.include_locks is False and login[
                'type'] == config.LOGON_TYPE_UNLOCK:
            discard = True
            log.debug("Discarding event")
        else:
            aux = utils.get_logout_event(args.index, login['logonid'],
                                         login['timestamp'], maxdate,
                                         args.include_locks)
            logout = utils.build_event_from_source(aux)
            log.debug("Got logoff event for login id {}".format(login['id']))

            if logout['timestamp'] > 0:
                aux = logout['timestamp'] - login['timestamp']
                try:
                    duration = str(timedelta(microseconds=aux * 10**3))
                except:
                    duration = '-'

            event = utils.build_logon_sequence(duration, login, logout)
            if logout is not None:
                log.debug("Logon sequence complete")

            if args.mark_if_logged_at is not None:
                event['mark.description'] = "Logged on at {}".format(
                    args.mark_if_logged_at)
                if login['timestamp'] > whowasat or (
                        logout['timestamp'] > 0
                        and logout['timestamp'] < whowasat):
                    event['mark.value'] = False
                else:
                    event['mark.value'] = True

            discard = False
            if args.who_was_at is not None and (
                    login['timestamp'] > whowasat or
                (logout['timestamp'] > 0 and logout['timestamp'] < whowasat)):
                discard = True
                log.debug("Discarding event")

        if discard is False:
            count += 1
            if csv is not None:
                csv.add_sequence(event)
            if neo is not None:
                neo.add_sequence(event, args.neo4j_full_info,
                                 args.unique_logon_rels)
            log.debug("Event stored")

        progress += 1
        proglen = utils.draw_progress_bar(
            float((progress * 100 / total) / 100.0), begin, proglen)

    # timeframe
    if total > 0 and csv is not None and args.disable_timeframe is False:
        frame = dict(config.EVENT_STRUCT)
        for k in frame.keys():
            frame[k] = "-" * 10
        frame[config.CSV_FIELDS[0]] = "TIMEFRAME/END"
        frame['logon.datetime'] = args.max_date
        frame['logon.timestamp'] = maxdate
        frame['logoff.datetime'] = args.max_date
        frame['logoff.timestamp'] = maxdate
        csv.add_sequence(frame)

    total = timedelta(microseconds=int((time.time() - begin) * 10**6))
    print("")
    log.info("{} Logons processed in {}".format(count, total))

    if neo is not None:
        neo.finish()

    return
Exemple #11
0
 def get_es_query(self):
     # Just using 'terms' would not work, as it would return any tag match
     # in the list, but we want to exactly match all of them.
     return [Q('term', tags=tag) for tag in self.get_value()]
 def query_parser(qstr=None):
     """Default parser that uses the Q() from elasticsearch_dsl."""
     if qstr:
         return Q('query_string', query=qstr)
     return Q()
Exemple #13
0
    def get_object_list(self, request):
        user = request.user
        query_text = request.GET.get('query', None)
        if not user.is_authenticated:
            result_dict = simple_search_public_data(query_text)
            return [SearchObject(id=1, hits=result_dict)]
        groups = user.groups.all()
        index_list = ['experiments', 'dataset', 'datafile']
        ms = MultiSearch(index=index_list)

        query_exp = Q("match", title=query_text)
        query_exp_oacl = Q("term", objectacls__entityId=user.id) | \
            Q("term", public_access=100)
        for group in groups:
            query_exp_oacl = query_exp_oacl | \
                                 Q("term", objectacls__entityId=group.id)
        query_exp = query_exp & query_exp_oacl
        ms = ms.add(
            Search(index='experiments').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_exp))

        query_dataset = Q("match", description=query_text)
        query_dataset_oacl = Q("term", **{'experiments.objectacls.entityId': user.id}) | \
            Q("term", **{'experiments.public_access': 100})
        for group in groups:
            query_dataset_oacl = query_dataset_oacl | \
                                 Q("term", **{'experiments.objectacls.entityId': group.id})
        ms = ms.add(
            Search(index='dataset').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_dataset).query(
                    'nested', path='experiments', query=query_dataset_oacl))

        query_datafile = Q("match", filename=query_text)
        query_datafile_oacl = Q("term", experiments__objectacls__entityId=user.id) | \
            Q("term", experiments__public_access=100)
        for group in groups:
            query_datafile_oacl = query_datafile_oacl | \
                                 Q("term", experiments__objectacls__entityId=group.id)
        query_datafile = query_datafile & query_datafile_oacl
        ms = ms.add(
            Search(index='datafile').extra(
                size=MAX_SEARCH_RESULTS,
                min_score=MIN_CUTOFF_SCORE).query(query_datafile))
        results = ms.execute()
        result_dict = {k: [] for k in ["experiments", "datasets", "datafiles"]}
        for item in results:
            for hit in item.hits.hits:
                if hit["_index"] == "dataset":
                    result_dict["datasets"].append(hit.to_dict())

                elif hit["_index"] == "experiments":
                    result_dict["experiments"].append(hit.to_dict())

                elif hit["_index"] == "datafile":
                    result_dict["datafiles"].append(hit.to_dict())

        return [SearchObject(id=1, hits=result_dict)]
Exemple #14
0
def search(request):

    q = request.params.get("q", '')

    if q:
        should = []
        for field in SEARCH_FIELDS:
            kw = {"query": q}
            if field in SEARCH_BOOSTS:
                kw["boost"] = SEARCH_BOOSTS[field]
            should.append(Q("match", **{field: kw}))

        # Add a prefix query if ``q`` is longer than one character.
        if len(q) > 1:
            should.append(Q('prefix', normalized_name=q))

        query = request.es.query("dis_max", queries=should)
        query = query.suggest("name_suggestion", q, term={"field": "name"})
    else:
        query = request.es.query()

    if request.params.get("o"):
        query = query.sort(request.params["o"])

    if request.params.getall("c"):
        query = query.filter("terms", classifiers=request.params.getall("c"))

    try:
        page_num = int(request.params.get("page", 1))
    except ValueError:
        raise HTTPBadRequest("'page' must be an integer.")

    page = ElasticsearchPage(
        query,
        page=page_num,
        url_maker=paginate_url_factory(request),
    )

    if page.page_count and page_num > page.page_count:
        return HTTPNotFound()

    available_filters = collections.defaultdict(list)

    classifiers_q = (request.db.query(Classifier).with_entities(
        Classifier.classifier).filter(
            exists([release_classifiers.c.trove_id]).where(
                release_classifiers.c.trove_id == Classifier.id)).order_by(
                    Classifier.classifier))

    for cls in classifiers_q:
        first, *_ = cls.classifier.split(' :: ')
        available_filters[first].append(cls.classifier)

    def filter_key(item):
        try:
            return 0, SEARCH_FILTER_ORDER.index(item[0]), item[0]
        except ValueError:
            return 1, 0, item[0]

    return {
        "page": page,
        "term": q,
        "order": request.params.get("o", ''),
        "available_filters": sorted(available_filters.items(), key=filter_key),
        "applied_filters": request.params.getall("c"),
    }
Exemple #15
0
 def filter_queryset(self, qs):
     qs = super().filter_queryset(qs)
     qs = qs.query(query.Bool(filter=[Q('term', is_recommended=True)]))
     return qs.query('function_score',
                     functions=[query.SF('random_score')]).sort('_score')
Exemple #16
0
def resource_accessess(resource, userid=0):
    s = Search().extra(size=0)

    if userid != 0:
        s = s.query(
            'bool',
            must=[
                Q("range", datetime={
                    'gte': 'now-7d',
                    'lte': 'now-1d'
                }),
                Q("match", component="resources"),
                Q('bool',
                  should=[
                      Q('match', action='access'),
                      Q('match', action='view')
                  ]),
                Q('match',
                  **{'context__' + resource._my_subclass + '_id':
                     resource.id}),
                Q('match', user_id=userid)
            ])
    else:
        s = s.query(
            'bool',
            must=[
                Q("range", datetime={
                    'gte': 'now-7d',
                    'lte': 'now-1d'
                }),
                Q("match", component="resources"),
                Q('bool',
                  should=[
                      Q('match', action='access'),
                      Q('match', action='view')
                  ]),
                Q('match',
                  **{'context__' + resource._my_subclass + '_id': resource.id})
            ])

    return s
Exemple #17
0
def apply_ao_specific_query_params(query, **kwargs):
    must_clauses = []
    if kwargs.get('ao_no'):
        must_clauses.append(Q('terms', no=kwargs.get('ao_no')))

    if kwargs.get('ao_name'):
        must_clauses.append(Q('match', name=' '.join(kwargs.get('ao_name'))))

    if kwargs.get('ao_is_pending') is not None:
        must_clauses.append(Q('term', is_pending=kwargs.get('ao_is_pending')))

    if kwargs.get('ao_status'):
        must_clauses.append(Q('match', status=kwargs.get('ao_status')))

    if kwargs.get('ao_requestor'):
        must_clauses.append(Q('match', requestor_names=kwargs.get('ao_requestor')))

    citation_queries = []
    if kwargs.get('ao_regulatory_citation'):
        for citation in kwargs.get('ao_regulatory_citation'):
            exact_match = re.match(r"(?P<title>\d+)\s+C\.?F\.?R\.?\s+§*\s*(?P<part>\d+)\.(?P<section>\d+)", citation)
            if(exact_match):
                citation_queries.append(Q("nested", path="regulatory_citations", query=Q("bool",
                    must=[Q("term", regulatory_citations__title=int(exact_match.group('title'))),
                        Q("term", regulatory_citations__part=int(exact_match.group('part'))),
                        Q("term", regulatory_citations__section=int(exact_match.group('section')))])))

    if kwargs.get('ao_statutory_citation'):
        for citation in kwargs.get('ao_statutory_citation'):
            exact_match = re.match(r"(?P<title>\d+)\s+U\.?S\.?C\.?\s+§*\s*(?P<section>\d+).*\.?)", citation)
            if(exact_match):
                citation_queries.append(Q("nested", path="statutory_citations", query=Q("bool",
                    must=[Q("term", statutory_citations__title=int(exact_match.group('title'))),
                    Q("term", statutory_citations__section=int(exact_match.group('section')))])))

    if kwargs.get('ao_citation_require_all'):
        must_clauses.append(Q('bool', must=citation_queries))
    else:
        must_clauses.append(Q('bool', should=citation_queries, minimum_should_match=1))

    if kwargs.get('ao_requestor_type'):
        requestor_types = {1: 'Federal candidate/candidate committee/officeholder',
                      2: 'Publicly funded candidates/committees',
                      3: 'Party committee, national',
                      4: 'Party committee, state or local',
                      5: 'Nonconnected political committee',
                      6: 'Separate segregated fund',
                      7: 'Labor Organization',
                      8: 'Trade Association',
                      9: 'Membership Organization, Cooperative, Corporation W/O Capital Stock',
                     10: 'Corporation (including LLCs electing corporate status)',
                     11: 'Partnership (including LLCs electing partnership status)',
                     12: 'Governmental entity',
                     13: 'Research/Public Interest/Educational Institution',
                     14: 'Law Firm',
                     15: 'Individual',
                     16: 'Other'}
        must_clauses.append(Q("terms", requestor_types=[requestor_types[r] for r in kwargs.get('ao_requestor_type')]))

    date_range = {}
    if kwargs.get('ao_min_issue_date'):
        date_range['gte'] = kwargs.get('ao_min_issue_date')
    if kwargs.get('ao_max_issue_date'):
        date_range['lte'] = kwargs.get('ao_max_issue_date')
    if date_range:
        must_clauses.append(Q("range", issue_date=date_range))

    date_range = {}
    if kwargs.get('ao_min_request_date'):
        date_range['gte'] = kwargs.get('ao_min_request_date')
    if kwargs.get('ao_max_request_date'):
        date_range['lte'] = kwargs.get('ao_max_request_date')
    if date_range:
        must_clauses.append(Q("range", request_date=date_range))

    if kwargs.get('ao_entity_name'):
        must_clauses.append(Q('bool', should=[Q('match', commenter_names=' '.join(kwargs.get('ao_entity_name'))),
          Q('match', representative_names=' '.join(kwargs.get('ao_entity_name')))],
            minimum_should_match=1))

    query = query.query('bool', must=must_clauses)

    return query
def test_complex_example():
    s = search.Search()
    s = s.query('match', title='python') \
        .query(~Q('match', title='ruby')) \
        .filter(F('term', category='meetup') | F('term', category='conference')) \
        .post_filter('terms', tags=['prague', 'czech']) \
        .script_fields(more_attendees="doc['attendees'].value + 42")

    s.aggs.bucket('per_country', 'terms', field='country')\
        .metric('avg_attendees', 'avg', field='attendees')

    s.query.minimum_should_match = 2

    s = s.highlight_options(order='score').highlight('title',
                                                     'body',
                                                     fragment_size=50)

    assert {
        'query': {
            'filtered': {
                'filter': {
                    'bool': {
                        'should': [{
                            'term': {
                                'category': 'meetup'
                            }
                        }, {
                            'term': {
                                'category': 'conference'
                            }
                        }]
                    }
                },
                'query': {
                    'bool': {
                        'must': [{
                            'match': {
                                'title': 'python'
                            }
                        }],
                        'must_not': [{
                            'match': {
                                'title': 'ruby'
                            }
                        }],
                        'minimum_should_match': 2
                    }
                }
            }
        },
        'post_filter': {
            'terms': {
                'tags': ['prague', 'czech']
            }
        },
        'aggs': {
            'per_country': {
                'terms': {
                    'field': 'country'
                },
                'aggs': {
                    'avg_attendees': {
                        'avg': {
                            'field': 'attendees'
                        }
                    }
                }
            }
        },
        "highlight": {
            'order': 'score',
            'fields': {
                'title': {
                    'fragment_size': 50
                },
                'body': {
                    'fragment_size': 50
                }
            }
        },
        'script_fields': {
            'more_attendees': {
                'script': "doc['attendees'].value + 42"
            }
        }
    } == s.to_dict()
Exemple #19
0
 def __call__(self, search, _):
     return search.query(
         Q("bool", must=[Q("terms", references=self.annotation_ids)]))
Exemple #20
0
 def get(cls, id, kind):
     s = cls.search()
     s.query = Q('bool', must=[Q('term', id=id), Q('term', kind=kind)])
     rs = s.execute()
     if rs:
         return rs.hits[0]
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """
        # Require that the list of fields be passed.
        if not kwargs.get('_fields'):
            raise MissingArgumentError('_fields')
        self.all_fields = kwargs['_fields']

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.context.get_doctype(),
        )

        # Create filters.
        filters = []
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:
                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                        if results_number > 1000:
                            raise BadArgumentError(
                                '_results_number',
                                msg=('_results_number cannot be greater '
                                     'than 1,000'))
                        if results_number < 0:
                            raise BadArgumentError(
                                '_results_number',
                                msg='_results_number cannot be negative')
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]
                        # Why cap it?
                        # Because if the query is covering a lot of different
                        # things you can get a really really large query
                        # which can hog resources excessively.
                        # Downloading, as an example, 100k facets (and 0 hits)
                        # when there is plenty of data yields a 11MB JSON
                        # file.
                        if facets_size > 10000:
                            raise BadArgumentError(
                                '_facets_size greater than 10,000')

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]
                name = self.get_full_field_name(field_data)

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                # Operators needing wildcards, and the associated value
                # transformation with said wildcards.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '^': '%s*',  # starts with
                    '$': '*%s'  # ends with
                }
                # Operators needing ranges, and the associated Elasticsearch
                # comparison operator.
                operator_range = {
                    '>': 'gt',
                    '<': 'lt',
                    '>=': 'gte',
                    '<=': 'lte',
                }

                args = {}
                filter_type = 'term'
                filter_value = None

                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]

                        if not isinstance(val,
                                          six.string_types) or ' ' not in val:
                            # There's only one term and no white space, this
                            # is a simple term filter.
                            filter_value = val
                        else:
                            # If the term contains white spaces, we want to
                            # perform a phrase query.
                            filter_type = 'query'
                            args = Q(
                                'simple_query_string',
                                query=param.value[0],
                                fields=[name],
                                default_operator='and',
                            ).to_dict()
                    else:
                        # There are several terms, this is a terms filter.
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_range:
                    filter_type = 'range'
                    filter_value = {
                        operator_range[param.operator]: param.value
                    }
                elif param.operator == '__null__':
                    filter_type = 'missing'
                    args['field'] = name
                elif param.operator == '__true__':
                    filter_type = 'term'
                    filter_value = True
                elif param.operator == '@':
                    filter_type = 'regexp'
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator in operator_wildcards:
                    filter_type = 'query'

                    # Wildcard operations are better applied to a non-analyzed
                    # field (called "full") if there is one.
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    q_args = {}
                    q_args[name] = operator_wildcards[
                        param.operator] % param.value
                    query = Q('wildcard', **q_args)
                    args = query.to_dict()

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    new_filter = F(filter_type, **args)
                    if param.operator_not:
                        new_filter = ~new_filter

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif filter_type == 'range':
                        sub_filters &= new_filter
                    else:
                        sub_filters |= new_filter

                    continue

            if sub_filters is not None:
                filters.append(sub_filters)

        search = search.filter(F('bool', must=filters))

        # Restricting returned fields.
        fields = []

        # We keep track of the requested columns in order to make sure we
        # return those column names and not aliases for example.
        self.request_columns = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                self.request_columns.append(value)
                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product then descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        if facets_size:
            self._create_aggregations(params, search, facets_size,
                                      histogram_intervals)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        errors = []

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()

                aggregations = getattr(results, 'aggregations', {})
                if aggregations:
                    aggregations = self.format_aggregations(aggregations)

                shards = getattr(results, '_shards', {})

                break  # Yay! Results!
            except NotFoundError as e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                errors.append({
                    'type': 'missing_index',
                    'index': missing_index,
                })

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    shards = None
                    break
            except RequestError as exception:
                exc_type, exc_value, exc_tb = sys.exc_info()
                # Try to handle it gracefully if we can find out what
                # input was bad and caused the exception.
                try:
                    bad_input = ELASTICSEARCH_PARSE_EXCEPTION_REGEX.findall(
                        exception.error)[-1]
                    # Loop over the original parameters to try to figure
                    # out which *key* had the bad input.
                    for key, value in kwargs.items():
                        if value == bad_input:
                            raise BadArgumentError(key)
                except IndexError:
                    # Not an ElasticsearchParseException exception
                    pass

                # Re-raise the original exception with the correct traceback
                six.reraise(exc_type, exc_value, exc_tb)

        if shards and shards.failed:
            # Some shards failed. We want to explain what happened in the
            # results, so the client can decide what to do.
            failed_indices = defaultdict(int)
            for failure in shards.failures:
                failed_indices[failure.index] += 1

            for index, shards_count in failed_indices.items():
                errors.append({
                    'type': 'shards',
                    'index': index,
                    'shards_count': shards_count,
                })

        return {
            'hits': hits,
            'total': total,
            'facets': aggregations,
            'errors': errors,
        }
def search(search_params, index, page_size, ip, request,
           filter_dead, page=1) -> Tuple[List[Hit], int, int]:
    """
    Given a set of keywords and an optional set of filters, perform a ranked
    paginated search.

    :param search_params: Search parameters. See
     :class: `ImageSearchQueryStringSerializer`.
    :param index: The Elasticsearch index to search (e.g. 'image')
    :param page_size: The number of results to return per page.
    :param ip: The user's hashed IP. Hashed IPs are used to anonymously but
    uniquely identify users exclusively for ensuring query consistency across
    Elasticsearch shards.
    :param request: Django's request object.
    :param filter_dead: Whether dead links should be removed.
    :param page: The results page number.
    :return: Tuple with a List of Hits from elasticsearch, the total count of
    pages and results.
    """
    s = Search(index=index)
    # Apply term filters. Each tuple pairs a filter's parameter name in the API
    # with its corresponding field in Elasticsearch. "None" means that the
    # names are identical.
    filters = [
        ('extension', None),
        ('categories', None),
        ('aspect_ratio', None),
        ('size', None),
        ('source', 'provider'),
        ('license', 'license__keyword'),
        ('license_type', 'license__keyword')
    ]
    for tup in filters:
        api_field, elasticsearch_field = tup
        s = _apply_filter(s, search_params, api_field, elasticsearch_field)
    # Get suggestions for any route
    s = s.suggest(
        'get_suggestion',
        '',
        term={'field': 'creator'}
    )
    # Exclude mature content unless explicitly enabled by the requester
    if not search_params.data['mature']:
        s = s.exclude('term', mature=True)
    # Hide data sources from the catalog dynamically.
    filter_cache_key = 'filtered_providers'
    filtered_providers = cache.get(key=filter_cache_key)
    if not filtered_providers:
        filtered_providers = models.ContentProvider.objects\
            .filter(filter_content=True)\
            .values('provider_identifier')
        cache.set(
            key=filter_cache_key,
            timeout=CACHE_TIMEOUT,
            value=filtered_providers
        )
    to_exclude = [f['provider_identifier'] for f in filtered_providers]
    s = s.exclude('terms', provider=to_exclude)

    # Search either by generic multimatch or by "advanced search" with
    # individual field-level queries specified.
    search_fields = ['tags.name', 'title', 'description']
    if 'q' in search_params.data:
        query = _quote_escape(search_params.data['q'])
        s = s.query(
            'simple_query_string',
            query=query,
            fields=search_fields
        )
        # Get suggestions for term query
        s = s.suggest(
            'get_suggestion',
            query,
            term={'field': 'creator'}
        )
    else:
        if 'creator' in search_params.data:
            creator = _quote_escape(search_params.data['creator'])
            s = s.query(
                'simple_query_string', query=creator, fields=['creator']
            )
            # Get suggestions for creator
            s = s.suggest(
                'get_suggestion',
                creator,
                term={'field': 'creator'}
            )
        if 'title' in search_params.data:
            title = _quote_escape(search_params.data['title'])
            s = s.query(
                'simple_query_string', query=title, fields=['title']
            )
            # Get suggestions for title
            s = s.suggest(
                'get_suggestion',
                title,
                term={'field': 'title'}
            )
        if 'tags' in search_params.data:
            tags = _quote_escape(search_params.data['tags'])
            s = s.query(
                'simple_query_string',
                fields=['tags.name'],
                query=tags
            )
            # Get suggestions for tags
            s = s.suggest(
                'get_suggestion',
                tags,
                term={'field': 'tags.name'}
            )
    # Boost by popularity metrics
    if POPULARITY_BOOST:
        queries = []
        factors = ['comments', 'views', 'likes']
        boost_factor = 100 / len(factors)
        for factor in factors:
            rank_feature_query = Q(
                'rank_feature',
                field=factor,
                boost=boost_factor
            )
            queries.append(rank_feature_query)
        s = Search().query(
            Q(
                'bool',
                must=s.query,
                should=queries,
                minimum_should_match=1
            )
        )

    # Use highlighting to determine which fields contribute to the selection of
    # top results.
    s = s.highlight(*search_fields)
    s = s.highlight_options(order='score')
    s.extra(track_scores=True)
    # Route users to the same Elasticsearch worker node to reduce
    # pagination inconsistencies and increase cache hits.
    s = s.params(preference=str(ip), request_timeout=7)
    # Paginate
    start, end = _get_query_slice(s, page_size, page, filter_dead)
    s = s[start:end]
    try:
        search_response = s.execute()
        log.info(f'query={s.to_dict()}, es_took_ms={search_response.took}')
    except RequestError as e:
        raise ValueError(e)
    results = _post_process_results(
        s,
        start,
        end,
        page_size,
        search_response,
        request,
        filter_dead
    )

    suggestion = _query_suggestions(search_response)

    result_count, page_count = _get_result_and_page_count(
        search_response,
        results,
        page_size
    )

    return results, page_count, result_count, suggestion
Exemple #23
0
def search_elastic(term='', user=None, sort='id', order='desc',
                   category='0_0', quality_filter='0', page=1,
                   rss=False, admin=False, logged_in_user=None,
                   per_page=75, max_search_results=1000):
    # This function can easily be memcached now

    es_client = Elasticsearch()

    es_sort_keys = {
        'id': 'id',
        'size': 'filesize',
        # 'name': 'display_name',  # This is slow and buggy
        'comments': 'comment_count',
        'seeders': 'seed_count',
        'leechers': 'leech_count',
        'downloads': 'download_count'
    }

    sort_ = sort.lower()
    if sort_ not in es_sort_keys:
        flask.abort(400)

    es_sort = es_sort_keys[sort]

    order_keys = {
        'desc': 'desc',
        'asc': 'asc'
    }

    order_ = order.lower()
    if order_ not in order_keys:
        flask.abort(400)

    # Only allow ID, desc if RSS
    if rss:
        sort = es_sort_keys['id']
        order = 'desc'

    # funky, es sort is default asc, prefixed by '-' if desc
    if 'desc' == order:
        es_sort = '-' + es_sort

    # Quality filter
    quality_keys = [
        '0',  # Show all
        '1',  # No remakes
        '2',  # Only trusted
        '3'   # Only completed
    ]

    if quality_filter.lower() not in quality_keys:
        flask.abort(400)

    quality_filter = int(quality_filter)

    # Category filter
    main_category = None
    sub_category = None
    main_cat_id = 0
    sub_cat_id = 0
    if category:
        cat_match = re.match(r'^(\d+)_(\d+)$', category)
        if not cat_match:
            flask.abort(400)

        main_cat_id = int(cat_match.group(1))
        sub_cat_id = int(cat_match.group(2))

        if main_cat_id > 0:
            if sub_cat_id > 0:
                sub_category = models.SubCategory.by_category_ids(main_cat_id, sub_cat_id)
                if not sub_category:
                    flask.abort(400)
            else:
                main_category = models.MainCategory.by_id(main_cat_id)
                if not main_category:
                    flask.abort(400)

    # This might be useless since we validate users
    # before coming into this method, but just to be safe...
    if user:
        user = models.User.by_id(user)
        if not user:
            flask.abort(404)
        user = user.id

    same_user = False
    if logged_in_user:
        same_user = user == logged_in_user.id

    s = Search(using=es_client, index=app.config.get('ES_INDEX_NAME'))  # todo, sukebei prefix

    # Apply search term
    if term:
        s = s.query('simple_query_string',
                    analyzer='my_search_analyzer',
                    default_operator="AND",
                    query=term)

    # User view (/user/username)
    if user:
        s = s.filter('term', uploader_id=user)

        if not admin:
            # Hide all DELETED torrents if regular user
            s = s.filter('term', deleted=False)
            # If logged in user is not the same as the user being viewed,
            # show only torrents that aren't hidden or anonymous.
            #
            # If logged in user is the same as the user being viewed,
            # show all torrents including hidden and anonymous ones.
            #
            # On RSS pages in user view, show only torrents that
            # aren't hidden or anonymous no matter what
            if not same_user or rss:
                s = s.filter('term', hidden=False)
                s = s.filter('term', anonymous=False)
    # General view (homepage, general search view)
    else:
        if not admin:
            # Hide all DELETED torrents if regular user
            s = s.filter('term', deleted=False)
            # If logged in, show all torrents that aren't hidden unless they belong to you
            # On RSS pages, show all public torrents and nothing more.
            if logged_in_user and not rss:
                hiddenFilter = Q('term', hidden=False)
                userFilter = Q('term', uploader_id=logged_in_user.id)
                combinedFilter = hiddenFilter | userFilter
                s = s.filter('bool', filter=[combinedFilter])
            else:
                s = s.filter('term', hidden=False)

    if main_category:
        s = s.filter('term', main_category_id=main_cat_id)
    elif sub_category:
        s = s.filter('term', main_category_id=main_cat_id)
        s = s.filter('term', sub_category_id=sub_cat_id)

    if quality_filter == 0:
        pass
    elif quality_filter == 1:
        s = s.filter('term', remake=False)
    elif quality_filter == 2:
        s = s.filter('term', trusted=True)
    elif quality_filter == 3:
        s = s.filter('term', complete=True)

    # Apply sort
    s = s.sort(es_sort)

    # Only show first RESULTS_PER_PAGE items for RSS
    if rss:
        s = s[0:per_page]
    else:
        max_page = min(page, int(math.ceil(max_search_results / float(per_page))))
        from_idx = (max_page - 1) * per_page
        to_idx = min(max_search_results, max_page * per_page)
        s = s[from_idx:to_idx]

    highlight = app.config.get('ENABLE_ELASTIC_SEARCH_HIGHLIGHT')
    if highlight:
        s = s.highlight_options(tags_schema='styled')
        s = s.highlight("display_name")

    # Return query, uncomment print line to debug query
    # from pprint import pprint
    # print(json.dumps(s.to_dict()))
    return s.execute()
Exemple #24
0
        "params": {'param2'},
        "tags": {"tag3"}
    }
    assert actual_col_to_whitelist_dict == col_to_whitelist_dict


@pytest.mark.parametrize("test_parsed_filter,test_query,test_type", [
    ({
        'type': 'parameter',
        'key': 'param0',
        'comparator': 'LIKE',
        'value': '%va%'
    },
     Q('bool',
       filter=[
           Q("term", params__key="param0"),
           Q("wildcard", params__value="*va*")
       ]), "params"),
    ({
        'type': 'parameter',
        'key': 'param0',
        'comparator': 'ILIKE',
        'value': '%va%'
    },
     Q('bool',
       filter=[
           Q("term", params__key="param0"),
           Q("wildcard", params__value="*va*")
       ]), "params"),
    ({
        'type': 'parameter',
Exemple #25
0
 class PhaseConfig:
     filter_fields = ('docclass', 'status', 'unit', 'discipline',
                      'document_type', 'under_review', 'overdue', 'leader',
                      'approver')
     indexable_fields = ['is_existing', 'can_be_transmitted']
     es_field_types = {
         'overdue': 'boolean',
     }
     column_fields = (
         ('', 'under_preparation_by'),
         ('Document Number', 'document_number'),
         ('Title', 'title'),
         ('Rev.', 'current_revision'),
         ('Status', 'status'),
         ('Class', 'docclass'),
         ('Unit', 'unit'),
         ('Discipline', 'discipline'),
         ('Document type', 'document_type'),
         ('Review start date', 'review_start_date'),
         ('Review due date', 'review_due_date'),
         ('Under review', 'under_review'),
         ('Overdue', 'overdue'),
         ('Leader', 'leader'),
         ('Approver', 'approver'),
         ('Final revision', 'final_revision'),
     )
     transmittal_columns = {
         'Document Number': 'document_key',
         'Title': 'title',
         'Contract Number': 'contract_number',
         'Originator': 'originator',
         'Unit': 'unit',
         'Discipline': 'discipline',
         'Document Type': 'document_type',
         'Sequential Number': 'sequential_number',
         'Class': 'docclass',
         'Revision': 'revision',
         'Status': 'status',
         'Received Date': 'received_date',
         'Created': 'created_on',
     }
     export_fields = OrderedDict((
         ('Document number', 'document_number'),
         ('Title', 'title'),
         ('Revision', 'revision_name'),
         ('Revision date', 'revision_date'),
         ('Status', 'status'),
         ('Doc category', 'category'),
         ('Class', 'docclass'),
         ('Contract Number', 'contract_number'),
         ('Originator', 'originator'),
         ('Unit', 'unit'),
         ('Discipline', 'discipline'),
         ('Document type', 'document_type'),
         ('Sequential number', 'sequential_number'),
         ('System', 'system'),
         ('WBS', 'wbs'),
         ('Weight', 'weight'),
         ('Is final revision', 'final_revision'),
         ('Received date', 'received_date'),
         ('Created on', 'created_on'),
         ('Review start date', 'review_start_date'),
         ('Review due date', 'review_due_date'),
         ('Leader', 'leader'),
         ('Approver', 'approver'),
         ('Outgoing transmittal', 'transmittal'),
         ('Sent date', 'transmittal_sent_date'),
         ('Purpose of issue', 'purpose_of_issue'),
         ('External due date', 'external_review_due_date'),
         ('Return code', 'return_code'),
         ('STD Planned', 'status_std_planned_date'),
         ('IDC Planned', 'status_idc_planned_date'),
         ('IFR Planned', 'status_ifr_planned_date'),
         ('IFA Planned', 'status_ifa_planned_date'),
         ('IFD Planned', 'status_ifd_planned_date'),
         ('IFC Planned', 'status_ifc_planned_date'),
         ('IFI Planned', 'status_ifi_planned_date'),
         ('ASB Planned', 'status_asb_planned_date'),
         ('STD Forecast', 'status_std_forecast_date'),
         ('IDC Forecast', 'status_idc_forecast_date'),
         ('IFR Forecast', 'status_ifr_forecast_date'),
         ('IFA Forecast', 'status_ifa_forecast_date'),
         ('IFD Forecast', 'status_ifd_forecast_date'),
         ('IFC Forecast', 'status_ifc_forecast_date'),
         ('IFI Forecast', 'status_ifi_forecast_date'),
         ('ASB Forecast', 'status_asb_forecast_date'),
         ('STD Actual', 'status_std_actual_date'),
         ('IFR Actual', 'status_ifr_actual_date'),
         ('IDC Actual', 'status_idc_actual_date'),
         ('IFA Actual', 'status_ifa_actual_date'),
         ('IFD Actual', 'status_ifd_actual_date'),
         ('IFC Actual', 'status_ifc_actual_date'),
         ('IFI Actual', 'status_ifi_actual_date'),
         ('ASB Actual', 'status_asb_actual_date'),
     ))
     custom_filters = OrderedDict((('show_cld_spd', {
         'field': forms.BooleanField,
         'label': _('Show CLD/SPD docs'),
         'filters': {
             True: None,
             False: Q('term', is_existing=True),
             None: Q('term', is_existing=True)
         }
     }), ('outgoing_trs', {
         'field': forms.BooleanField,
         'label': _('Ready for outgoing TRS'),
         'filters': {
             True: Q('term', can_be_transmitted=True),
             False: None,
             None: None,
         }
     })))
Exemple #26
0
    def inner(values):
        terms = current_custom_metadata.terms
        available_terms = current_custom_metadata.available_vocabulary_set
        conditions = []

        for value in values:
            # Matches this:
            #   [vocabulary:term]:[value]
            parsed = re.match(
                r'^\[(?P<key>[-\w]+\:[-\w]+)\]\:\[(?P<val>.+)\]$', value)
            if not parsed:
                raise RESTValidationError(errors=[
                    FieldError(
                        field, 'The parameter should have the format: '
                        'custom=[term]:[value].')
                ])

            parsed = parsed.groupdict()
            search_key = parsed['key']
            search_value = parsed['val']

            if search_key not in available_terms:
                raise RESTValidationError(errors=[
                    FieldError(
                        field, u'The "{}" term is not supported.'.format(
                            search_key))
                ])

            custom_fields_mapping = dict(
                keyword='custom_keywords',
                text='custom_text',
                relationship='custom_relationships',
            )

            term_type = terms[search_key]['type']
            es_field = custom_fields_mapping[term_type]

            nested_clauses = [
                {
                    'term': {
                        '{}.key'.format(es_field): search_key
                    }
                },
            ]

            if term_type in ('text', 'keyword'):
                nested_clauses.append({
                    'query_string': {
                        'fields': ['{}.value'.format(es_field)],
                        'query': search_value,
                    }
                })
            elif term_type == 'relationship':
                if ':' not in search_value:
                    raise RESTValidationError(errors=[
                        FieldError(field, (
                            'Relatinship terms serach values should '
                            'follow the format "<sub>:<obj>".'))
                    ])

                sub, obj = search_value.split(':', 1)
                if sub:
                    nested_clauses.append({
                        'query_string': {
                            'fields': [es_field + '.subject'],
                            'query': sub
                        }
                    })
                if obj:
                    nested_clauses.append({
                        'query_string': {
                            'fields': [es_field + '.object'],
                            'query': obj
                        }
                    })

            conditions.append({
                'nested': {
                    'path': es_field,
                    'query': {
                        'bool': {
                            'must': nested_clauses
                        }
                    },
                }
            })
        return Q('bool', must=conditions)
Exemple #27
0
 def search(self):
     """Get Elasticsearch search instance."""
     s = self._search_cls(index=self._index)
     if self._query:
         s.query(Q('query_string', query=self._query))
     return s
Exemple #28
0
def TermsMatch(key, value):
    return Q('terms', **{key: value})
Exemple #29
0
    def obj_create(self, bundle, **kwargs):
        user = bundle.request.user
        groups = user.groups.all()

        # if anonymous user search public data only
        query_text = bundle.data.get("text", None)
        type_tag = bundle.data.get("TypeTag", [])
        index_list = []
        for type in type_tag:
            if type == 'Experiment':
                index_list.append('experiments')
            elif type == 'Dataset':
                index_list.append('dataset')
            elif type == 'Datafile':
                index_list.append('datafile')
        end_date = bundle.data.get("EndDate", None)
        start_date = bundle.data.get("StartDate", None)
        if end_date is not None:
            end_date_utc = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S.%fZ") \
                .replace(tzinfo=pytz.timezone('UTC'))
            end_date = end_date_utc.astimezone(LOCAL_TZ).date()
        else:
            # set end date to today's date
            end_date = datetime.datetime.today().replace(tzinfo=pytz.timezone('UTC'))
        if start_date:
            start_date_utc = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%S.%fZ") \
                .replace(tzinfo=pytz.timezone('UTC'))
            start_date = start_date_utc.astimezone(LOCAL_TZ).date()
        instrument_list = bundle.data.get("InstrumentList", None)
        instrument_list_id = []
        if instrument_list:
            for ins in instrument_list:
                instrument_list_id.append(Instrument.objects.get(name__exact=ins).id)
        # query for experiment model
        ms = MultiSearch(index=index_list)
        if 'experiments' in index_list:
            query_exp = Q("match", title=query_text)
            if user.is_authenticated:
                query_exp_oacl = Q("term", objectacls__entityId=user.id) | \
                                 Q("term", public_access=100)
                for group in groups:
                    query_exp_oacl = query_exp_oacl | \
                                     Q("term", objectacls__entityId=group.id)
            else:
                query_exp_oacl = Q("term", public_access=100)
            if start_date is not None:
                query_exp = query_exp & Q("range", created_time={'gte': start_date, 'lte': end_date})
            query_exp = query_exp & query_exp_oacl
            ms = ms.add(Search(index='experiments')
                        .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE)
                        .query(query_exp))
        if 'dataset' in index_list:
            query_dataset = Q("match", description=query_text)
            if user.is_authenticated:
                query_dataset_oacl = Q("term", **{'experiments.objectacls.entityId': user.id}) | \
                                     Q("term", **{'experiments.public_access': 100})
                for group in groups:
                    query_dataset_oacl = query_dataset_oacl | \
                                         Q("term", **{'experiments.objectacls.entityId': group.id})
            else:
                query_dataset_oacl = Q("term", **{'experiments.public_access': 100})
            if start_date is not None:
                query_dataset = query_dataset & Q("range", created_time={'gte': start_date, 'lte': end_date})
            if instrument_list:
                query_dataset = query_dataset & Q("terms", **{'instrument.id': instrument_list_id})
            # add instrument query
            ms = ms.add(Search(index='dataset')
                        .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE).query(query_dataset)
                        .query('nested', path='experiments', query=query_dataset_oacl))
        if 'datafile' in index_list:
            query_datafile = Q("match", filename=query_text)
            if user.is_authenticated:
                query_datafile_oacl = Q("term", experiments__objectacls__entityId=user.id) | \
                                      Q("term", experiments__public_access=100)
                for group in groups:
                    query_datafile_oacl = query_datafile_oacl | \
                                          Q("term", experiments__objectacls__entityId=group.id)
            else:
                query_datafile_oacl = Q("term", experiments__public_access=100)
            if start_date is not None:
                query_datafile = query_datafile & Q("range", created_time={'gte': start_date, 'lte': end_date})
            query_datafile = query_datafile & query_datafile_oacl
            ms = ms.add(Search(index='datafile')
                        .extra(size=MAX_SEARCH_RESULTS, min_score=MIN_CUTOFF_SCORE)
                        .query(query_datafile))
        result = ms.execute()
        result_dict = {k: [] for k in ["experiments", "datasets", "datafiles"]}
        for item in result:
            for hit in item.hits.hits:
                if hit["_index"] == "dataset":
                    result_dict["datasets"].append(hit.to_dict())

                elif hit["_index"] == "experiments":
                    result_dict["experiments"].append(hit.to_dict())

                elif hit["_index"] == "datafile":
                    result_dict["datafiles"].append(hit.to_dict())

        if bundle.request.method == 'POST':
            bundle.obj = SearchObject(id=1, hits=result_dict)
        return bundle
Exemple #30
0
    def get(self, **kwargs):
        """Return a list of results and aggregations based on parameters.

        The list of accepted parameters (with types and default values) is in
        the database and can be accessed with the super_search_fields service.
        """

        # Filter parameters and raise potential errors.
        params = self.get_parameters(**kwargs)

        # Find the indices to use to optimize the elasticsearch query.
        indices = self.get_indices(params['date'])

        # Create and configure the search object.
        search = Search(
            using=self.get_connection(),
            index=indices,
            doc_type=self.config.elasticsearch.elasticsearch_doctype,
        )

        # Create filters.
        filters = None
        histogram_intervals = {}

        for field, sub_params in params.items():
            sub_filters = None
            for param in sub_params:

                if param.name.startswith('_'):
                    # By default, all param values are turned into lists,
                    # even when they have and can have only one value.
                    # For those we know there can only be one value,
                    # so we just extract it from the made-up list.
                    if param.name == '_results_offset':
                        results_from = param.value[0]
                    elif param.name == '_results_number':
                        results_number = param.value[0]
                    elif param.name == '_facets_size':
                        facets_size = param.value[0]

                    for f in self.histogram_fields:
                        if param.name == '_histogram_interval.%s' % f:
                            histogram_intervals[f] = param.value[0]

                    # Don't use meta parameters in the query.
                    continue

                field_data = self.all_fields[param.name]

                name = '%s.%s' % (
                    field_data['namespace'],
                    field_data['in_database_name']
                )

                if param.data_type in ('date', 'datetime'):
                    param.value = datetimeutil.date_to_string(param.value)
                elif param.data_type == 'enum':
                    param.value = [x.lower() for x in param.value]
                elif param.data_type == 'str' and not param.operator:
                    param.value = [x.lower() for x in param.value]

                args = {}
                filter_type = 'term'
                filter_value = None
                if not param.operator:
                    # contains one of the terms
                    if len(param.value) == 1:
                        val = param.value[0]
                        if not isinstance(val, basestring) or (
                            isinstance(val, basestring) and ' ' not in val
                        ):
                            filter_value = val

                        # If the term contains white spaces, we want to perform
                        # a phrase query. Thus we do nothing here and let this
                        # value be handled later.
                    else:
                        filter_type = 'terms'
                        filter_value = param.value
                elif param.operator == '=':
                    # is exactly
                    if field_data['has_full_version']:
                        name = '%s.full' % name
                    filter_value = param.value
                elif param.operator == '>':
                    # greater than
                    filter_type = 'range'
                    filter_value = {
                        'gt': param.value
                    }
                elif param.operator == '<':
                    # lower than
                    filter_type = 'range'
                    filter_value = {
                        'lt': param.value
                    }
                elif param.operator == '>=':
                    # greater than or equal to
                    filter_type = 'range'
                    filter_value = {
                        'gte': param.value
                    }
                elif param.operator == '<=':
                    # lower than or equal to
                    filter_type = 'range'
                    filter_value = {
                        'lte': param.value
                    }
                elif param.operator == '__null__':
                    # is null
                    filter_type = 'missing'
                    args['field'] = name

                if filter_value is not None:
                    args[name] = filter_value

                if args:
                    if param.operator_not:
                        new_filter = ~F(filter_type, **args)
                    else:
                        new_filter = F(filter_type, **args)

                    if sub_filters is None:
                        sub_filters = new_filter
                    elif param.data_type == 'enum':
                        sub_filters |= new_filter
                    else:
                        sub_filters &= new_filter

                    continue

                # These use a wildcard and thus need to be in a query
                # instead of a filter.
                operator_wildcards = {
                    '~': '*%s*',  # contains
                    '$': '%s*',  # starts with
                    '^': '*%s'  # ends with
                }
                if param.operator in operator_wildcards:
                    if field_data['has_full_version']:
                        name = '%s.full' % name

                    query_type = 'wildcard'
                    args[name] = (
                        operator_wildcards[param.operator] % param.value
                    )
                elif not param.operator:
                    # This is a phrase that was passed down.
                    query_type = 'simple_query_string'
                    args['query'] = param.value[0]
                    args['fields'] = [name]
                    args['default_operator'] = 'and'

                if args:
                    query = Q(query_type, **args)
                    if param.operator_not:
                        query = ~query
                    search = search.query(query)
                else:
                    # If we reach this point, that means the operator is
                    # not supported, and we should raise an error about that.
                    raise NotImplementedError(
                        'Operator %s is not supported' % param.operator
                    )

            if filters is None:
                filters = sub_filters
            elif sub_filters is not None:
                filters &= sub_filters

        search = search.filter(filters)

        # Restricting returned fields.
        fields = []
        for param in params['_columns']:
            for value in param.value:
                if not value:
                    continue

                field_name = self.get_field_name(value, full=False)
                fields.append(field_name)

        search = search.fields(fields)

        # Sorting.
        sort_fields = []
        for param in params['_sort']:
            for value in param.value:
                if not value:
                    continue

                # Values starting with a '-' are sorted in descending order.
                # In order to retrieve the database name of the field, we
                # must first remove the '-' part and add it back later.
                # Example: given ['product', '-version'], the results will be
                # sorted by ascending product and descending version.
                desc = False
                if value.startswith('-'):
                    desc = True
                    value = value[1:]

                field_name = self.get_field_name(value, full=False)

                if desc:
                    # The underlying library understands that '-' means
                    # sorting in descending order.
                    field_name = '-' + field_name

                sort_fields.append(field_name)

        search = search.sort(*sort_fields)

        # Pagination.
        results_to = results_from + results_number
        search = search[results_from:results_to]

        # Create facets.
        for param in params['_facets']:
            for value in param.value:
                if not value:
                    continue

                field_name = self.get_field_name(value)
                search.aggs.bucket(
                    value,
                    'terms',
                    field=field_name,
                    size=facets_size,
                )

        # Create signature aggregations.
        if params.get('_aggs.signature'):
            sig_bucket = A(
                'terms',
                field=self.get_field_name('signature'),
                size=facets_size,
            )
            for param in params['_aggs.signature']:
                for value in param.value:
                    if not value:
                        continue

                    field_name = self.get_field_name(value)
                    sig_bucket.bucket(
                        value,
                        'terms',
                        field=field_name,
                        size=facets_size,
                    )

            search.aggs.bucket('signature', sig_bucket)

        # Create histograms.
        for f in self.histogram_fields:
            if params.get('_histogram.%s' % f):
                histogram_type = (
                    self.all_fields[f]['query_type'] == 'date'
                    and 'date_histogram' or 'histogram'
                )
                date_bucket = A(
                    histogram_type,
                    field=self.get_field_name(f),
                    interval=histogram_intervals[f],
                )
                for param in params['_histogram.%s' % f]:
                    for value in param.value:
                        if not value:
                            continue

                        field_name = self.get_field_name(value)
                        val_bucket = A(
                            'terms',
                            field=field_name,
                            size=facets_size,
                        )
                        date_bucket.bucket(value, val_bucket)

                search.aggs.bucket('histogram_%s' % f, date_bucket)

        # Query and compute results.
        hits = []

        if params['_return_query'][0].value[0]:
            # Return only the JSON query that would be sent to elasticsearch.
            return {
                'query': search.to_dict(),
                'indices': indices,
            }

        # We call elasticsearch with a computed list of indices, based on
        # the date range. However, if that list contains indices that do not
        # exist in elasticsearch, an error will be raised. We thus want to
        # remove all failing indices until we either have a valid list, or
        # an empty list in which case we return no result.
        while True:
            try:
                results = search.execute()
                for hit in results:
                    hits.append(self.format_fields(hit.to_dict()))

                total = search.count()
                aggregations = self.format_aggregations(results.aggregations)
                break  # Yay! Results!
            except NotFoundError, e:
                missing_index = re.findall(BAD_INDEX_REGEX, e.error)[0]
                if missing_index in indices:
                    del indices[indices.index(missing_index)]
                else:
                    # Wait what? An error caused by an index that was not
                    # in the request? That should never happen, but in case
                    # it does, better know it.
                    raise

                if indices:
                    # Update the list of indices and try again.
                    # Note: we need to first empty the list of indices before
                    # updating it, otherwise the removed indices never get
                    # actually removed.
                    search = search.index().index(*indices)
                else:
                    # There is no index left in the list, return an empty
                    # result.
                    hits = []
                    total = 0
                    aggregations = {}
                    break