Ejemplo n.º 1
0
def get_items(args):
    """ Get all the news items for the date and filters provided

    For performance reasons, returns an iterator that yields an array of CHUNK_SIZE
    So that aggregations can be queried while the next iteration is retrieved
    """

    if not args.get('section'):
        abort(400, gettext('Must provide a section for this report'))

    source = {
        'query':
        items_query(True),
        'size':
        CHUNK_SIZE,
        'from':
        0,
        'sort': [{
            'versioncreated': 'asc'
        }],
        '_source': [
            'headline', 'place', 'subject', 'service', 'versioncreated',
            'anpa_take_key'
        ]
    }

    must_terms = []
    if args.get('genre'):
        must_terms.append(
            {'terms': {
                'genre.code': [genre for genre in args['genre']]
            }})

    args['date_to'] = args['date_from']
    date_range = get_date_filters(args)
    if date_range.get('gt') or date_range.get('lt'):
        must_terms.append({'range': {'versioncreated': date_range}})

    if len(must_terms) > 0:
        source['query']['bool']['must'].append(must_terms)

    # Apply the section filters
    section = args['section']
    get_resource_service('section_filters').apply_section_filter(
        source['query'], section)

    while True:
        results = get_resource_service(
            '{}_search'.format(section)).search(source)
        items = list(results)

        if not len(items):
            break

        source['from'] += CHUNK_SIZE

        yield items
Ejemplo n.º 2
0
    def get_genres():
        """Get the list of genres from the news items"""

        query = items_query(True)
        must_terms = []
        source = {}

        if date_range.get('gt') or date_range.get('lt'):
            must_terms.append({'range': {'versioncreated': date_range}})

        if len(must_terms) > 0:
            query['bool']['must'].append(must_terms)

        source.update({
            'query': query,
            'size': 0,
            'aggs': {
                'genres': {
                    'terms': {
                        'field': 'genre.code',
                        'size': 0
                    }
                }
            }
        })

        # Apply the section filters
        section = args['section']
        get_resource_service('section_filters').apply_section_filter(
            source['query'], section)

        results = get_resource_service(
            '{}_search'.format(section)).search(source)

        buckets = ((results.hits.get('aggregations') or {}).get('genres')
                   or {}).get('buckets') or []

        return [genre['key'] for genre in buckets]