def post(self): # get args self.start_cursor = self.request.get('cursor') self.filtering_event_key = self.request.get('event') self.filename = self.request.get('filename') self.csv_header = self.request.get('csv_header') self.worker_url = self.request.get('worker_url') self.event = Event.get(self.filtering_event_key) if self.filtering_event_key else None # get (base) query, skip query to cursor, filter for sites query = self.get_base_query() if self.start_cursor: query.with_cursor(self.start_cursor) fetched_sites = query.fetch(limit=self.sites_per_task) sites = self.filter_sites(fetched_sites) # write part of csv file to GCS csv_part_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor, 'w', content_type='text/csv' ) self._write_csv_rows(csv_part_gcs_fd, sites) csv_part_gcs_fd.close() # decide what to do next self.end_cursor = query.cursor() if self.end_cursor and self.start_cursor != self.end_cursor: # chain to next task taskqueue.add( url=self.worker_url, params=self.get_continuation_param_dict(), retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3), ) else: # finish file: combine parts and deduplicate lines logging.info(u"Deduplicating to create %s ..." % self.filename) sio = StringIO() path_prefix = BUCKET_NAME + '/' + self.filename + '.part' for gcs_file_stat in cloudstorage.listbucket(path_prefix): csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename) for line in csv_part_gcs_fd: sio.write(line) csv_part_gcs_fd.close() sio.seek(0) deduplicated_lines = set(line for line in sio) # write csv header and deduplicated lines to new file csv_complete_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + self.filename, 'w', content_type='text/csv' ) csv_complete_gcs_fd.write(self.csv_header.encode('utf-8')) for line in deduplicated_lines: csv_complete_gcs_fd.write(line) csv_complete_gcs_fd.close()
def AuthenticatedGet(self, org, event): # select event(s) to show if org.is_global_admin: events = Event.all() elif org.is_admin: events = [event] self.render(events=events)
def post(self): # get args self.start_cursor = self.request.get('cursor') self.filtering_event_key = self.request.get('event') self.filename = self.request.get('filename') self.csv_header = self.request.get('csv_header') self.worker_url = self.request.get('worker_url') self.event = Event.get( self.filtering_event_key) if self.filtering_event_key else None # get (base) query, skip query to cursor, filter for sites query = self.get_base_query() if self.start_cursor: query.with_cursor(self.start_cursor) fetched_sites = query.fetch(limit=self.sites_per_task) sites = self.filter_sites(fetched_sites) # write part of csv file to GCS csv_part_gcs_fd = cloudstorage.open(BUCKET_NAME + '/' + self.filename + '.part.' + self.start_cursor, 'w', content_type='text/csv') self._write_csv_rows(csv_part_gcs_fd, sites) csv_part_gcs_fd.close() # decide what to do next self.end_cursor = query.cursor() if self.end_cursor and self.start_cursor != self.end_cursor: # chain to next task taskqueue.add( url=self.worker_url, params=self.get_continuation_param_dict(), retry_options=taskqueue.TaskRetryOptions(task_retry_limit=3), ) else: # finish file: combine parts and deduplicate lines logging.info(u"Deduplicating to create %s ..." % self.filename) sio = StringIO() path_prefix = BUCKET_NAME + '/' + self.filename + '.part' for gcs_file_stat in cloudstorage.listbucket(path_prefix): csv_part_gcs_fd = cloudstorage.open(gcs_file_stat.filename) for line in csv_part_gcs_fd: sio.write(line) csv_part_gcs_fd.close() sio.seek(0) deduplicated_lines = set(line for line in sio) # write csv header and deduplicated lines to new file csv_complete_gcs_fd = cloudstorage.open(BUCKET_NAME + '/' + self.filename, 'w', content_type='text/csv') csv_complete_gcs_fd.write(self.csv_header.encode('utf-8')) for line in deduplicated_lines: csv_complete_gcs_fd.write(line) csv_complete_gcs_fd.close()
def get(self): # defer crunch and save for each event for event in Event.all(): if event.logged_in_to_recently: logging.info(u"Crunching statistics for %s" % event.short_name) deferred.defer( self._crunch_and_save, str(event.key()), _queue='crunch-statistics', ) else: logging.info(u"Crunching statistics: skipping %s" % event.short_name)
def get(self): # start export Task chain for each event for event in Event.all(): if event.logged_in_to_recently: logging.info(u"Exporting all sites in %s" % event.short_name) filename = all_event_timeless_filename(event) self.start_export( org=None, event=event, worker_url='/export_bulk_worker', filtering_event_key=event.key(), filename=filename, ) else: logging.info(u"Export all sites: skipping %s" % event.short_name)
def get(self, path): # path is assumed to be an incident short name -- check event = Event.all().filter('short_name', path).get() if not event: self.abort(404) # switch on config option config_setting = get_config_key('handle_incident_short_names') if config_setting == 'public_map': self.redirect('/public-map?initial_incident_id=' + event.short_name) return elif config_setting == 'authentication': self.redirect('/authentication?initial_event_name=' + event.name) return else: self.abort(404)
def _crunch_and_save(cls, event_key): event = Event.get(event_key) # crunch stats_d = crunch_incident_statistics(event) csv_content = incident_statistics_csv(stats_d) html_content = incident_statistics_html(stats_d) # save csv & html csv_gcs_fd = cloudstorage.open(BUCKET_NAME + '/' + incident_statistics_csv_filename(event), 'w', content_type='text/csv') csv_gcs_fd.write(csv_content.encode('utf-8')) csv_gcs_fd.close() html_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + incident_statistics_html_filename(event), 'w', content_type='text/html') html_gcs_fd.write(html_content.encode('utf-8')) html_gcs_fd.close()
def post(self): # get args self.start_cursor = self.request.get('cursor') self.filtering_event_key = self.request.get('event') self.filename = self.request.get('filename') self.csv_header = self.request.get('csv_header') self.worker_url = self.request.get('worker_url') self.event = Event.get(self.filtering_event_key) if self.filtering_event_key else None # get (base) query, skip query to cursor, filter for sites query = self.get_base_query() if self.start_cursor: query.with_cursor(self.start_cursor) fetched_sites = query.fetch(limit=self.sites_per_task) sites = self.filter_sites(fetched_sites) # try deleting before uploading try: logging.info("try to delete bucket") cloudstorage.delete(BUCKET_NAME + '/' + self.filename) except Exception, e: logging.error("Deleting bucket failed: %s" % e)
def _crunch_and_save(cls, event_key): event = Event.get(event_key) # crunch stats_d = crunch_incident_statistics(event) csv_content = incident_statistics_csv(stats_d) html_content = incident_statistics_html(stats_d) # save csv & html csv_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + incident_statistics_csv_filename(event), 'w', content_type='text/csv' ) csv_gcs_fd.write(csv_content.encode('utf-8')) csv_gcs_fd.close() html_gcs_fd = cloudstorage.open( BUCKET_NAME + '/' + incident_statistics_html_filename(event), 'w', content_type='text/html' ) html_gcs_fd.write(html_content.encode('utf-8')) html_gcs_fd.close()
def get_base_query(self): query = Site.all() if self.filtering_event_key: query.filter('event', Event.get(self.filtering_event_key)) return query