コード例 #1
0
def get_items(args):
    """ Get all the news items for the date and filters provided

    For performance reasons, returns an iterator that yields an array of CHUNK_SIZE
    So that aggregations can be queried while the next iteration is retrieved
    """

    if not args.get('section'):
        abort(400, gettext('Must provide a section for this report'))

    source = {
        'query':
        items_query(True),
        'size':
        CHUNK_SIZE,
        'from':
        0,
        'sort': [{
            'versioncreated': 'asc'
        }],
        '_source': [
            'headline', 'place', 'subject', 'service', 'versioncreated',
            'anpa_take_key'
        ]
    }

    must_terms = []
    if args.get('genre'):
        must_terms.append(
            {'terms': {
                'genre.code': [genre for genre in args['genre']]
            }})

    args['date_to'] = args['date_from']
    date_range = get_date_filters(args)
    if date_range.get('gt') or date_range.get('lt'):
        must_terms.append({'range': {'versioncreated': date_range}})

    if len(must_terms) > 0:
        source['query']['bool']['must'].append(must_terms)

    # Apply the section filters
    section = args['section']
    get_resource_service('section_filters').apply_section_filter(
        source['query'], section)

    while True:
        results = get_resource_service(
            '{}_search'.format(section)).search(source)
        items = list(results)

        if not len(items):
            break

        source['from'] += CHUNK_SIZE

        yield items
コード例 #2
0
ファイル: reports.py プロジェクト: petrjasek/newsroom
def get_company_api_usage():
    args = deepcopy(request.args.to_dict())
    date_range = get_date_filters(args)

    if not date_range.get('gt') and date_range.get('lt'):
        abort(400, 'No date range specified.')

    source = {}
    must_terms = [{"range": {"created": date_range}}]
    source['query'] = {'bool': {'must': must_terms}}
    source['sort'] = [{'created': 'desc'}]
    source['size'] = 200
    source['from'] = int(args.get('from', 0))
    source['aggs'] = {
        "items": {
            "aggs": {
                "endpoints": {
                    "terms": {
                        "size": 0,
                        "field": "endpoint"
                    }
                }
            },
            "terms": {
                "size": 0,
                "field": "subscriber"
            }
        }
    }
    company_ids = [t['company'] for t in query_resource(API_TOKENS)]
    source['query']['bool']['must'].append(
        {"terms": {
            "subscriber": company_ids
        }})
    companies = get_entity_dict(query_resource(
        'companies', lookup={'_id': {
            '$in': company_ids
        }}),
                                str_id=True)
    req = ParsedRequest()
    req.args = {'source': json.dumps(source)}

    if source['from'] >= 1000:
        # https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html#pagination
        return abort(400)

    unique_endpoints = []
    search_result = superdesk.get_resource_service('api_audit').get(req, None)
    results = format_report_results(search_result, unique_endpoints, companies)

    results = {
        'results': results,
        'name': gettext('Company News API Usage'),
        'result_headers': unique_endpoints,
    }
    return results
コード例 #3
0
def get_facets(args):
    """Get aggregations for genre and companies using the date range and section

    This is used to populate the dropdown filters in the front-end
    """

    args['date_to'] = args['date_from']
    date_range = get_date_filters(args)

    def get_genres():
        """Get the list of genres from the news items"""

        query = items_query(True)
        must_terms = []
        source = {}

        if date_range.get('gt') or date_range.get('lt'):
            must_terms.append({'range': {'versioncreated': date_range}})

        if len(must_terms) > 0:
            query['bool']['must'].append(must_terms)

        source.update({
            'query': query,
            'size': 0,
            'aggs': {
                'genres': {
                    'terms': {
                        'field': 'genre.code',
                        'size': 0
                    }
                }
            }
        })

        # Apply the section filters
        section = args['section']
        get_resource_service('section_filters').apply_section_filter(
            source['query'], section)

        results = get_resource_service(
            '{}_search'.format(section)).search(source)

        buckets = ((results.hits.get('aggregations') or {}).get('genres')
                   or {}).get('buckets') or []

        return [genre['key'] for genre in buckets]

    def get_companies():
        """Get the list of companies from the action history"""

        must_terms = [{'term': {'section': args['section']}}]
        if date_range.get('gt') or date_range.get('lt'):
            must_terms.append({'range': {'_created': date_range}})

        source = {
            'query': {
                'bool': {
                    'must': must_terms
                }
            },
            'size': 0,
            'from': 0,
            'aggs': {
                'companies': {
                    'terms': {
                        'field': 'company',
                        'size': 0
                    }
                }
            }
        }

        results = get_resource_service('history').fetch_history(source)
        aggs = (results.get('hits') or {}).get('aggregations') or {}
        buckets = (aggs.get('companies') or {}).get('buckets') or []

        return [company['key'] for company in buckets]

    return {'genres': get_genres(), 'companies': get_companies()}
コード例 #4
0
ファイル: reports.py プロジェクト: petrjasek/newsroom
def get_subscriber_activity_report():
    args = deepcopy(request.args.to_dict())

    # Elastic query
    aggregations = {'action': {'terms': {'field': 'action', 'size': 0}}}
    must_terms = []
    source = {}

    if args.get('company'):
        must_terms.append({'term': {'company': args.get('company')}})

    if args.get('action'):
        must_terms.append({'term': {'action': args.get('action')}})

    if args.get('section'):
        must_terms.append({'term': {'section': args.get('section')}})

    date_range = get_date_filters(args)
    if date_range.get('gt') or date_range.get('lt'):
        must_terms.append({"range": {"versioncreated": date_range}})

    source['sort'] = [{'versioncreated': 'desc'}]
    if len(must_terms) > 0:
        source['query'] = {'bool': {'must': must_terms}}

    source['size'] = 25
    source['from'] = int(args.get('from', 0))
    source['aggs'] = aggregations

    if source['from'] >= 1000:
        # https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html#pagination
        return abort(400)

    # Get the results
    results = superdesk.get_resource_service('history').fetch_history(
        source, args.get('export'))
    docs = results['items']
    hits = results['hits']

    # Enhance the results
    wire_ids = []
    agenda_ids = []
    company_ids = []
    user_ids = []
    for doc in docs:
        if doc.get('section') == 'agenda':
            agenda_ids.append(doc.get('item'))
        else:
            wire_ids.append(doc.get('item'))

        company_ids.append(ObjectId(doc.get('company')))
        user_ids.append(ObjectId(doc.get('user')))

    agenda_items = get_entity_dict(get_items_by_id(agenda_ids, 'agenda'))
    wire_items = get_entity_dict(get_items_by_id(wire_ids, 'items'))
    company_items = get_entity_dict(get_items_by_id(company_ids, 'companies'),
                                    True)
    user_items = get_entity_dict(get_items_by_id(user_ids, 'users'), True)

    def get_section_name(s):
        return next(
            (sec for sec in newsroom_app.sections if sec.get('_id') == s),
            {}).get('name')

    for doc in docs:
        if doc.get('item') in wire_items:
            doc['item'] = {
                'item_text': wire_items[doc['item']].get('headline'),
                '_id': wire_items[doc['item']]['_id'],
                'item_href': '/{}?item={}'.format(doc['section'], doc['item'])
            }
        elif doc.get('item') in agenda_items:
            doc['item'] = {
                'item_text': (agenda_items[doc['item']].get('name')
                              or agenda_items[doc['item']].get('slugline')),
                '_id':
                agenda_items[doc['item']]['_id'],
                'item_href':
                '/agenda?item={}'.format(doc['item'])
            }

        if doc.get('company') in company_items:
            doc['company'] = company_items[doc.get('company')].get('name')

        if doc.get('user') in user_items:
            user = user_items[doc.get('user')]
            doc['user'] = "******".format(user.get('first_name'),
                                           user.get('last_name'))

        doc['section'] = get_section_name(doc['section'])
        doc['action'] = doc['action'].capitalize()

    if not request.args.get('export'):
        results = {
            'results': docs,
            'name': gettext('SubscriberActivity'),
            'aggregations': hits.get('aggregations')
        }
        return results
    else:
        field_names = [
            'Company', 'Section', 'Item', 'Action', 'User', 'Created'
        ]
        temp_file = io.StringIO()
        attachment_filename = '%s.csv' % utcnow().strftime('%Y%m%d%H%M%S')
        writer = csv.DictWriter(temp_file,
                                delimiter=',',
                                fieldnames=field_names)
        writer.writeheader()
        for doc in docs:
            row = {
                'Company': doc.get('company'),
                'Section': doc.get('section'),
                'Item': (doc.get('item') or {})['item_text'],
                'Action': doc.get('action'),
                'User': doc.get('user'),
                'Created':
                doc.get('versioncreated').strftime('%H:%M %d/%m/%y'),
            }

            writer.writerow(row)
        temp_file.seek(0)
        mimetype = 'text/plain'
        # Creating the byteIO object from the StringIO Object
        mem = io.BytesIO()
        mem.write(temp_file.getvalue().encode('utf-8'))
        # seeking was necessary. Python 3.5.2, Flask 0.12.2
        mem.seek(0)
        temp_file.close()
        attachment_filename = secure_filename(attachment_filename)
        return send_file(mem,
                         mimetype=mimetype,
                         attachment_filename=attachment_filename,
                         as_attachment=True)