def export(self): index_name = audit.get_index_name() es = ESConnectionFactoryFactory()() query = self.get_query() results = es.search(index=index_name, body=query, sort='date:desc', size=3000) output = StringIO() writer = csv.writer(output) writer.writerow(['Action', 'Path', 'User', 'Summary', 'Date']) for result in results['hits']['hits']: data = result['_source'] writer.writerow([ data['name'], self.get_path(data), data['user'], data['summary'], data['date'] ]) resp = self.request.response resp.setHeader('Content-Disposition', 'attachment; filename=export.csv') resp.setHeader('Content-Type', 'text/csv') output.seek(0) return output.read()
def doimport(args): start_time = datetime.now() if not os.path.exists(args.filepath): logger.critical("does not exist: {}".format(args.filepath)) sys.exit(1) try: catalog = api.portal.get_tool('portal_catalog') es_catalog = ElasticSearchCatalog(catalog) except Exception: logger.critical('Error setting up ElasticSearchCatalog') sys.exit(1) if not es_catalog.enabled: logger.critical('Elasticsearch not enabled on site `{}`'.format(args.site_id)) return es_custom_index_name_enabled = api.portal.get_registry_record( 'castle.es_index_enabled', default=False) custom_index_value = api.portal.get_registry_record('castle.es_index', default=None) index_name = audit.get_index_name( site_path=None, es_custom_index_name_enabled=es_custom_index_name_enabled, custom_index_value=custom_index_value) logger.info('importing audit log into ES index `{}`'.format(index_name)) es = ESConnectionFactoryFactory()() if not es.indices.exists(index_name): logger.info('creating index...') try: audit._create_index(es, index_name) except Exception: logging.critical('could not create index `{}`'.format(index_name), exc_info=True) sys.exit(1) num = 0 bulkdata = [] for log in get_log_data(args.filepath): bulkdata.append({ "_index": index_name, "_source": log, }) num += 1 if num % 10000 == 0: logger.info("at {}, performing bulk operation...".format(num)) bulkupdate(es, bulkdata, index_name) bulkdata = [] logger.info("at {}, performing final bulk operation...".format(num)) bulkupdate(es, bulkdata, index_name) end_time = datetime.now() elapsed_time = end_time - start_time logger.info('{} entries indexed in {}'.format(num, elapsed_time))
def do_query(self): index_name = audit.get_index_name() es = ESConnectionFactoryFactory()() query = self.get_query() try: page = int(self.request.get('page', 1)) except Exception: page = 1 start = (page - 1) * self.limit results = es.search(index=index_name, body=query, sort='date:desc', from_=start, size=self.limit) return results
def export(args): if args.indexname is not None: index_name = args.indexname else: es_custom_index_name_enabled = api.portal.get_registry_record( 'castle.es_index_enabled', default=False) custom_index_value = api.portal.get_registry_record('castle.es_index', default=None) index_name = audit.get_index_name( site_path=None, es_custom_index_name_enabled=es_custom_index_name_enabled, custom_index_value=custom_index_value) logger.info("exporting from ES index `{}`".format(index_name)) starttime = datetime.datetime.now() hostsoverride = None optsoverride = None if args.host is not None: hostsoverride = args.host optsoverride = dict( timeout=args.timeout, sniff_on_start=False, sniff_on_connection_fail=False, ) es = ESConnectionFactoryFactory(hostsoverride=hostsoverride, optsoverride=optsoverride)() query = {"query": {'match_all': {}}} countresult = es.count(index=index_name, body=query) size = countresult.get("count", -1) logger.info("{} results need to be exported (-1 is unknown)".format(size)) logger.info("fetching resultset with scroll time of `{}`".format( args.scrolltime)) results = es.search( index=index_name, body=query, sort='date:desc', scroll=args.scrolltime, size=10000, # max per search result timeout=args.searchtimeout) logger.info("writing to `{}` (truncated)".format(args.filepath)) with open(args.filepath, 'w') as output: writer = csv.writer(output, quoting=csv.QUOTE_ALL) logger.info("writing header row...") writer.writerow([ 'date', 'name', 'object', 'path', 'request_uri', 'summary', 'type', 'user' ]) num = 0 while len(results['hits']['hits']) > 0: old_scroll_id = results["_scroll_id"] logger.info("writing {} hits for scroll {}".format( len(results['hits']['hits']), old_scroll_id)) for result in results['hits']['hits']: data = result['_source'] rowdata = [ data.get("date", ""), data.get("name", ""), data.get("object", ""), data.get("path", ""), data.get("request_uri", ""), data.get("summary", ""), data.get("type", ""), data.get("user", ""), ] rowdata = [convertunicode(a) for a in rowdata] writer.writerow(rowdata) num += len(results['hits']['hits']) logger.info("{} of {} written".format(num, size)) logger.info("fetching next scroll...") results = es.scroll(scroll_id=old_scroll_id, scroll=args.scrolltime) endtime = datetime.datetime.now() deltatime = endtime - starttime logger.info("export complete -- took {}s, exported {} records".format( deltatime.total_seconds(), num))
def check_site(site): # XXX will store when last check was so we always only look back # to previous check time setSite(site) catalog = api.portal.get_tool('portal_catalog') es = ElasticSearchCatalog(catalog) if not es.enabled: return index_name = audit.get_index_name() es = ESConnectionFactoryFactory()() sannotations = IAnnotations(site) last_checked = sannotations.get(LAST_CHECKED_KEY) if last_checked is None: last_checked = DateTime() - 30 filters = [{ 'term': { 'type': 'workflow' } }, { 'range': { 'date': { 'gt': last_checked.ISO8601() } } }] if len(filters) > 1: qfilter = {'and': filters} else: qfilter = filters[0] query = { "query": { 'filtered': { 'filter': qfilter, 'query': { 'match_all': {} } } } } results = es.search(index=index_name, doc_type=audit.es_doc_type, body=query, sort='date:desc', size=1000) hits = results['hits']['hits'] workflow = api.portal.get_tool('portal_workflow') forced = [] checked = [] for hit in hits: hit = hit['_source'] if hit['object'] in checked: continue try: ob = uuidToObject(hit['object']) checked.append(hit['object']) except: continue try: review_history = workflow.getInfoFor(ob, 'review_history') if not review_history: continue for r in reversed(review_history): if (not r['action'] or r['review_state'] != 'published' or not r.get('comments', '').startswith('OVERRIDE:')): continue if r['time'] < last_checked: # just quit now, we're getting to older history that we don't care about break forced.append({'ob': ob, 'history_entry': r}) except WorkflowException: continue if len(forced) > 0: # sent out email to admins site_url = site.absolute_url() registry = getUtility(IRegistry) public_url = registry.get('plone.public_url') if not public_url: public_url = site_url email_html = EMAIL_BODY + '<ul>' for item in forced: ob = item['ob'] wf_entry = item['history_entry'] try: user = api.user.get(wf_entry['actor']) user_name = user.getProperty('fullname') or user.getId() except: user_name = wf_entry['actor'] email_html += EMAIL_BODY_ITEM.format( content_url=ob.absolute_url().replace(site_url, public_url), content_title=ob.Title(), user_name=user_name, comments=wf_entry.get('comments', '')) email_html += '</ul>' email_subject = "Forced content publication update(Site: %s)" % ( api.portal.get_registry_record('plone.site_title')) for user in api.user.get_users(): user_roles = api.user.get_roles(user=user) email = user.getProperty('email') if (('Manager' not in user_roles and 'Site Administrator' not in user_roles) or not email): continue utils.send_email(email, email_subject, html=email_html) site._p_jar.sync() sannotations[LAST_CHECKED_KEY] = DateTime() transaction.commit()