def get_items(args): """ Get all the news items for the date and filters provided For performance reasons, returns an iterator that yields an array of CHUNK_SIZE So that aggregations can be queried while the next iteration is retrieved """ if not args.get('section'): abort(400, gettext('Must provide a section for this report')) source = { 'query': items_query(True), 'size': CHUNK_SIZE, 'from': 0, 'sort': [{ 'versioncreated': 'asc' }], '_source': [ 'headline', 'place', 'subject', 'service', 'versioncreated', 'anpa_take_key' ] } must_terms = [] if args.get('genre'): must_terms.append( {'terms': { 'genre.code': [genre for genre in args['genre']] }}) args['date_to'] = args['date_from'] date_range = get_date_filters(args) if date_range.get('gt') or date_range.get('lt'): must_terms.append({'range': {'versioncreated': date_range}}) if len(must_terms) > 0: source['query']['bool']['must'].append(must_terms) # Apply the section filters section = args['section'] get_resource_service('section_filters').apply_section_filter( source['query'], section) while True: results = get_resource_service( '{}_search'.format(section)).search(source) items = list(results) if not len(items): break source['from'] += CHUNK_SIZE yield items
def get_company_api_usage(): args = deepcopy(request.args.to_dict()) date_range = get_date_filters(args) if not date_range.get('gt') and date_range.get('lt'): abort(400, 'No date range specified.') source = {} must_terms = [{"range": {"created": date_range}}] source['query'] = {'bool': {'must': must_terms}} source['sort'] = [{'created': 'desc'}] source['size'] = 200 source['from'] = int(args.get('from', 0)) source['aggs'] = { "items": { "aggs": { "endpoints": { "terms": { "size": 0, "field": "endpoint" } } }, "terms": { "size": 0, "field": "subscriber" } } } company_ids = [t['company'] for t in query_resource(API_TOKENS)] source['query']['bool']['must'].append( {"terms": { "subscriber": company_ids }}) companies = get_entity_dict(query_resource( 'companies', lookup={'_id': { '$in': company_ids }}), str_id=True) req = ParsedRequest() req.args = {'source': json.dumps(source)} if source['from'] >= 1000: # https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html#pagination return abort(400) unique_endpoints = [] search_result = superdesk.get_resource_service('api_audit').get(req, None) results = format_report_results(search_result, unique_endpoints, companies) results = { 'results': results, 'name': gettext('Company News API Usage'), 'result_headers': unique_endpoints, } return results
def get_facets(args): """Get aggregations for genre and companies using the date range and section This is used to populate the dropdown filters in the front-end """ args['date_to'] = args['date_from'] date_range = get_date_filters(args) def get_genres(): """Get the list of genres from the news items""" query = items_query(True) must_terms = [] source = {} if date_range.get('gt') or date_range.get('lt'): must_terms.append({'range': {'versioncreated': date_range}}) if len(must_terms) > 0: query['bool']['must'].append(must_terms) source.update({ 'query': query, 'size': 0, 'aggs': { 'genres': { 'terms': { 'field': 'genre.code', 'size': 0 } } } }) # Apply the section filters section = args['section'] get_resource_service('section_filters').apply_section_filter( source['query'], section) results = get_resource_service( '{}_search'.format(section)).search(source) buckets = ((results.hits.get('aggregations') or {}).get('genres') or {}).get('buckets') or [] return [genre['key'] for genre in buckets] def get_companies(): """Get the list of companies from the action history""" must_terms = [{'term': {'section': args['section']}}] if date_range.get('gt') or date_range.get('lt'): must_terms.append({'range': {'_created': date_range}}) source = { 'query': { 'bool': { 'must': must_terms } }, 'size': 0, 'from': 0, 'aggs': { 'companies': { 'terms': { 'field': 'company', 'size': 0 } } } } results = get_resource_service('history').fetch_history(source) aggs = (results.get('hits') or {}).get('aggregations') or {} buckets = (aggs.get('companies') or {}).get('buckets') or [] return [company['key'] for company in buckets] return {'genres': get_genres(), 'companies': get_companies()}
def get_subscriber_activity_report(): args = deepcopy(request.args.to_dict()) # Elastic query aggregations = {'action': {'terms': {'field': 'action', 'size': 0}}} must_terms = [] source = {} if args.get('company'): must_terms.append({'term': {'company': args.get('company')}}) if args.get('action'): must_terms.append({'term': {'action': args.get('action')}}) if args.get('section'): must_terms.append({'term': {'section': args.get('section')}}) date_range = get_date_filters(args) if date_range.get('gt') or date_range.get('lt'): must_terms.append({"range": {"versioncreated": date_range}}) source['sort'] = [{'versioncreated': 'desc'}] if len(must_terms) > 0: source['query'] = {'bool': {'must': must_terms}} source['size'] = 25 source['from'] = int(args.get('from', 0)) source['aggs'] = aggregations if source['from'] >= 1000: # https://www.elastic.co/guide/en/elasticsearch/guide/current/pagination.html#pagination return abort(400) # Get the results results = superdesk.get_resource_service('history').fetch_history( source, args.get('export')) docs = results['items'] hits = results['hits'] # Enhance the results wire_ids = [] agenda_ids = [] company_ids = [] user_ids = [] for doc in docs: if doc.get('section') == 'agenda': agenda_ids.append(doc.get('item')) else: wire_ids.append(doc.get('item')) company_ids.append(ObjectId(doc.get('company'))) user_ids.append(ObjectId(doc.get('user'))) agenda_items = get_entity_dict(get_items_by_id(agenda_ids, 'agenda')) wire_items = get_entity_dict(get_items_by_id(wire_ids, 'items')) company_items = get_entity_dict(get_items_by_id(company_ids, 'companies'), True) user_items = get_entity_dict(get_items_by_id(user_ids, 'users'), True) def get_section_name(s): return next( (sec for sec in newsroom_app.sections if sec.get('_id') == s), {}).get('name') for doc in docs: if doc.get('item') in wire_items: doc['item'] = { 'item_text': wire_items[doc['item']].get('headline'), '_id': wire_items[doc['item']]['_id'], 'item_href': '/{}?item={}'.format(doc['section'], doc['item']) } elif doc.get('item') in agenda_items: doc['item'] = { 'item_text': (agenda_items[doc['item']].get('name') or agenda_items[doc['item']].get('slugline')), '_id': agenda_items[doc['item']]['_id'], 'item_href': '/agenda?item={}'.format(doc['item']) } if doc.get('company') in company_items: doc['company'] = company_items[doc.get('company')].get('name') if doc.get('user') in user_items: user = user_items[doc.get('user')] doc['user'] = "******".format(user.get('first_name'), user.get('last_name')) doc['section'] = get_section_name(doc['section']) doc['action'] = doc['action'].capitalize() if not request.args.get('export'): results = { 'results': docs, 'name': gettext('SubscriberActivity'), 'aggregations': hits.get('aggregations') } return results else: field_names = [ 'Company', 'Section', 'Item', 'Action', 'User', 'Created' ] temp_file = io.StringIO() attachment_filename = '%s.csv' % utcnow().strftime('%Y%m%d%H%M%S') writer = csv.DictWriter(temp_file, delimiter=',', fieldnames=field_names) writer.writeheader() for doc in docs: row = { 'Company': doc.get('company'), 'Section': doc.get('section'), 'Item': (doc.get('item') or {})['item_text'], 'Action': doc.get('action'), 'User': doc.get('user'), 'Created': doc.get('versioncreated').strftime('%H:%M %d/%m/%y'), } writer.writerow(row) temp_file.seek(0) mimetype = 'text/plain' # Creating the byteIO object from the StringIO Object mem = io.BytesIO() mem.write(temp_file.getvalue().encode('utf-8')) # seeking was necessary. Python 3.5.2, Flask 0.12.2 mem.seek(0) temp_file.close() attachment_filename = secure_filename(attachment_filename) return send_file(mem, mimetype=mimetype, attachment_filename=attachment_filename, as_attachment=True)