Ejemplo n.º 1
0
def get_es_chunk(req_dict, start_record, chunk_size):
    """Retrieve a # chunksize documents from ElasticSearch index."""
    msg = "%s: %s" % (__name__, "get_es_chunk")
    logger.debug(msg)
    if settings.DEBUG:
        print >> stderr, msg

    validity, es_dict = do_search(settings.ES_INDEX, settings.ES_DOCTYPE,
                                  req_dict['query'], start_record, chunk_size,
                                  req_dict['dates'],
                                  req_dict['exclude_distributions'],
                                  req_dict['exclude_article_types'],
                                  req_dict['selected_pillars'], True)

    return es_dict['hits'], None
Ejemplo n.º 2
0
    def handle(self, *args, **options):
        if QueryTerm.objects.all().count() == 0:
            print 'No query terms stored in the database. Please run ' \
                  'python manage.py gatherqueryterms\' first.'
            sys.exit(1)

        query_size = 10
        n_repetitions = 10

        if len(args) > 0:
            query_size = int(args[0])
        if len(args) > 1:
            n_repetitions = int(args[1])

        response_times = []
        es_wall_clock = []

        for repetition in range(n_repetitions):
            # generate random weigthed query
            query_terms = QueryTerm.objects.order_by('?')[0:query_size]

            query_list = [
                '{}^{}'.format(t.term, randint(1, 40)) for t in query_terms
            ]
            q = ' OR '.join(query_list)

            t1 = time.time()
            dates = daterange2dates(settings.TEXCAVATOR_DATE_RANGE)
            valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE,
                                        q, 0, 20, dates[0], [], [])
            t2 = time.time()

            if not valid_q:
                print 'Invalid query: {}'.format(q)
            else:
                es_wall_clock.append((t2 - t1) * 1000)
                response_times.append(int(result.get('took')))
                self.stdout.write(str(result.get('took')))
                self.stdout.flush()

        avg = float(sum(response_times) / len(response_times))
        avg_wall_clock = float(sum(es_wall_clock) / len(es_wall_clock))
        print 'Average response time for queries of size {}: {} miliseconds'. \
              format(query_size, avg)
        print 'Average wall clock time for queries of size {}: {} ' \
              'miliseconds'.format(query_size, avg_wall_clock)
    def handle(self, *args, **options):
        if QueryTerm.objects.all().count() == 0:
            print 'No query terms stored in the database. Please run ' \
                  'python manage.py gatherqueryterms\' first.'
            sys.exit(1)

        query_size = 10
        n_repetitions = 10

        if len(args) > 0:
            query_size = int(args[0])
        if len(args) > 1:
            n_repetitions = int(args[1])

        response_times = []
        es_wall_clock = []

        for repetition in range(n_repetitions):
            # generate random weigthed query
            query_terms = QueryTerm.objects.order_by('?')[0:query_size]

            query_list = ['{}^{}'.format(t.term, randint(1, 40))
                          for t in query_terms]
            q = ' OR '.join(query_list)

            t1 = time.time()
            dates = daterange2dates(settings.TEXCAVATOR_DATE_RANGE)
            valid_q, result = do_search(settings.ES_INDEX, settings.ES_DOCTYPE,
                                        q, 0, 20, dates[0], [], [])
            t2 = time.time()

            if not valid_q:
                print 'Invalid query: {}'.format(q)
            else:
                es_wall_clock.append((t2-t1)*1000)
                response_times.append(int(result.get('took')))
                self.stdout.write(str(result.get('took')))
                self.stdout.flush()

        avg = float(sum(response_times)/len(response_times))
        avg_wall_clock = float(sum(es_wall_clock)/len(es_wall_clock))
        print 'Average response time for queries of size {}: {} miliseconds'. \
              format(query_size, avg)
        print 'Average wall clock time for queries of size {}: {} ' \
              'miliseconds'.format(query_size, avg_wall_clock)
Ejemplo n.º 4
0
def get_es_chunk(req_dict, start_record, chunk_size):
    """Retrieve a # chunksize documents from ElasticSearch index."""
    msg = "%s: %s" % (__name__, "get_es_chunk")
    logger.debug(msg)
    if settings.DEBUG:
        print >> stderr, msg

    validity, es_dict = do_search(settings.ES_INDEX,
                                  settings.ES_DOCTYPE,
                                  req_dict['query'],
                                  start_record,
                                  chunk_size,
                                  req_dict['dates'],
                                  req_dict['exclude_distributions'],
                                  req_dict['exclude_article_types'],
                                  req_dict['selected_pillars'],
                                  True)

    return es_dict['hits'], None