async def report_v2(self, start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp, report_timezone ) -> List[Tuple[datetime, float]]: logger.info( f"Creating control action report for datetime range {start_timestamp}, {end_timestamp} " f"in timezone{report_timezone}" ) boiler_control_actions_df = \ await self._control_action_repository.get_control_actions_by_timestamp_range( start_timestamp, end_timestamp ) control_action_list = [] if not boiler_control_actions_df.empty: datetime_column = boiler_control_actions_df[column_names.TIMESTAMP] datetime_column = datetime_column.dt.tz_convert(report_timezone) datetime_column = datetime_column.dt.to_pydatetime() boiler_out_temps = boiler_control_actions_df[column_names.FORWARD_PIPE_COOLANT_TEMP] boiler_out_temps = boiler_out_temps.round(1) boiler_out_temps = boiler_out_temps.to_list() for datetime_, boiler_out_temp in zip(datetime_column, boiler_out_temps): # noinspection PyTypeChecker control_action_list.append((datetime_, boiler_out_temp)) return control_action_list
def upload_file(): jurisdiction = request.args.get('jurisdiction') event_type = request.args.get('eventType') if can_upload_file(jurisdiction, event_type): filenames = [key for key in request.files.keys()] if len(filenames) != 1: return jsonify( status='error', message='Exactly one file must be uploaded at a time') uploaded_file = request.files[filenames[0]] filename = secure_filename(uploaded_file.filename) cwd = os.getcwd() tmp_dir = os.path.join(cwd, 'tmp') os.makedirs(tmp_dir, exist_ok=True) full_filename = os.path.join(tmp_dir, filename) uploaded_file.save(full_filename) upload_id = unique_upload_id() q = get_q(get_redis_connection()) job = q.enqueue_call(func=validate_async, args=(uploaded_file.filename, jurisdiction, full_filename, event_type, current_user.id, upload_id), result_ttl=5000, timeout=3600, meta={ 'event_type': event_type, 'filename': filename, 'upload_id': upload_id }) logger.info(f"Job id {job.get_id()}") return jsonify(status='validating', jobKey=job.get_id(), message='Validating data!') else: return jsonify(status='not authorized', exampleRows=[])
async def report_v3(self, start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp, report_timezone ) -> List[ControlActionV3]: logger.info( f"Creating control action report for datetime range {start_timestamp}, {end_timestamp} " f"in timezone{report_timezone}" ) boiler_control_actions_df = \ await self._control_action_repository.get_control_actions_by_timestamp_range( start_timestamp, end_timestamp ) control_actions_list = [] if not boiler_control_actions_df.empty: datetime_column = boiler_control_actions_df[column_names.TIMESTAMP] datetime_column = datetime_column.dt.tz_convert(report_timezone) datetime_column = datetime_column.to_list() boiler_out_temps = boiler_control_actions_df[column_names.FORWARD_PIPE_COOLANT_TEMP] boiler_out_temps = boiler_out_temps.round(1) boiler_out_temps = boiler_out_temps.to_list() for datetime_, boiler_out_temp in zip(datetime_column, boiler_out_temps): control_actions_list.append( ControlActionV3( timestamp=datetime_, forward_temp=boiler_out_temp ) ) return control_actions_list
def can_access_file(upload_id): upload = db_session.query(Upload).get(upload_id) if not upload: raise ValueError('upload_id: %s not present in metadata database', upload_id) logger.info('Found jurisdiction %s and event type %s for upload id %s', upload.jurisdiction_slug, upload.event_type_slug, upload_id) return can_upload_file(upload.jurisdiction_slug, upload.event_type_slug)
async def get_all_settings(self) -> List[SettingV3]: logger.info("Requesting all settings") loaded_settings = await self._settings_repository.get_all() response_settings = [] for loaded_setting_name, loaded_setting_value in loaded_settings.items( ): response_settings.append( SettingV3(name=loaded_setting_name, value=loaded_setting_value)) return response_settings
async def update_weather_forecast_async(self) -> None: logger.info("Requesting weather forecast update") weather_forecast_df = await self._weather_forecast_loader.load_weather( ) weather_forecast_df = await asyncio.get_running_loop().run_in_executor( self._executor, self._process_weather_forecast, weather_forecast_df) await self._weather_forecast_repository.set_weather_forecast( weather_forecast_df)
def get_last_upload_date(): last_upload = query.last_upload_date() try: assert len(last_upload) == 1 last_upload_date = last_upload[0]['upload_start_time'] logger.info(type(last_upload_date)) last_upload_date = last_upload_date.strftime('%Y-%m-%d') return jsonify(results=last_upload_date) except: return jsonify("no valid upload date")
async def update_control_actions_async(self) -> None: logger.info("Requesting control actions update") control_action_start_timestamp, control_action_end_timestamp = self._calc_control_action_start_end_timestamp( ) control_action_current_timestamp = control_action_start_timestamp while control_action_current_timestamp <= control_action_end_timestamp: await self._update_control_action_for_timestamp( control_action_current_timestamp) control_action_current_timestamp += self._timedelta await self._control_action_repository.drop_control_actions_older_than( control_action_start_timestamp)
def match_finished(matched_results_paths, match_job_id, match_start_at, match_complete_at, match_status, match_runtime, upload_id=None): try: logger.info('Writing to match log') write_match_log(db_session=db_session, match_job_id=match_job_id, match_start_at=match_start_at, match_complete_at=match_complete_at, match_status=match_status, match_runtime=match_runtime, upload_id=upload_id) logger.info('Writing matches to db') for event_type, filename in matched_results_paths.items(): jurisdiction = filename.split('/')[-3] logger.info( 'Writing matches from event type %s and filename %s to db. Parsed jurisdiction %s out of filename', event_type, filename, jurisdiction) with open_sesame(filename, 'rb') as matches_filehandle: write_matches_to_db(db_engine=engine, event_type=event_type, jurisdiction=jurisdiction, matches_filehandle=matches_filehandle) except Exception as e: logger.error('Error encountered during match_finished: %s', str(e)) finally: logger.info('All done!')
def get_records_by_time(): start_date = request.args.get('startDate') end_date = request.args.get('endDate') jurisdiction = request.args.get('jurisdiction') limit = request.args.get('limit', 10) offset = request.args.get('offset', 0) order_column = request.args.get('orderColumn') order = request.args.get('order') set_status = request.args.get('setStatus') logger.info(f'Pulling data from {start_date} to {end_date}') records = query.get_records_by_time(start_date, end_date, jurisdiction, limit, offset, order_column, order, set_status) return jsonify(results=records)
def notify_matcher(jurisdiction, upload_id=None): schema_pk_lookup = list_all_schemas_primary_keys(SCHEMA_DIRECTORY) base_data_directory = app_config['base_data_path'] directory_to_pass = base_data_directory.format(jurisdiction=jurisdiction) redis_connection = Redis(host='redis', port=6379) q = Queue('matching', connection=redis_connection) logger.info('Enqueueing do_match job') job = q.enqueue(f="matcher.do_match", args=(directory_to_pass, schema_pk_lookup, upload_id), result_ttl=5000, timeout=100000, meta={'upload_id': upload_id}) logger.info("Enqueued job %s", job)
def get_contact_dist(data, bins=None): data = data.groupby('matched_id').matched_id.count().as_matrix() data = data.astype(int) one_contact = list(data).count(1) rest = np.delete(data, np.argwhere(data == 1)) if one_contact == len(data): df_hist = pd.DataFrame({'contacts': [one_contact]}, index=['1 contact']) logger.info("all ones!") return df_hist, 1 if len(np.unique(rest)) == 1: df_hist = pd.DataFrame( {'contacts': [one_contact, len(rest)]}, index=['1 contact', f"{np.unique(rest)[0]} contacts"]) return df_hist, 1 if bins is not None: num, groups = np.histogram(rest, bins) else: num, groups = np.histogram(rest, 'auto') num, groups = np.histogram(rest, np.unique(groups.round())) if len(groups) > 4: bins = 4 num, groups = np.histogram(rest, bins) num, groups = np.histogram(rest, np.unique(groups.round())) hist = [one_contact] + list(num) index = [pd.Interval(1, 2, 'left')] + [ pd.Interval(int(b[0]), int(b[1]) + 1, 'left') for b in list(window(list(groups), 2)) ] df_hist = pd.DataFrame({'contacts': hist}, index=contacts_interval_to_text(index)) logger.info(num) logger.info(groups) logger.info(index) logger.info(df_hist) return df_hist, groups
def merge_file(): upload_id = request.args.get('uploadId', None) if not upload_id: return jsonify(status='invalid', reason='uploadId not present') has_access = False try: has_access = can_access_file(upload_id) if has_access: upload_log = db_session.query(Upload).get(upload_id) logger.info('Retrieved upload log, merging raw table to master') raw_table_name = 'raw_{}'.format(upload_id) logger.info('Merging raw table to master') merge_id = upsert_raw_table_to_master(raw_table_name, upload_log.jurisdiction_slug, upload_log.event_type_slug, upload_id, db_session) logger.info('Syncing merged file to s3') bootstrap_master_tables(upload_log.jurisdiction_slug, db_session) sync_merged_file_to_storage(upload_log.jurisdiction_slug, upload_log.event_type_slug, db_session.get_bind()) merge_log = db_session.query(MergeLog).get(merge_id) try: logger.info('Merge succeeded. Now querying matcher') notify_matcher(upload_log.jurisdiction_slug, upload_id) except Exception as e: logger.error('Error matching: ', e) db_session.rollback() return make_response(jsonify(status='error'), 500) db_session.commit() return jsonify(status='success', new_unique_rows=merge_log.new_unique_rows, total_unique_rows=merge_log.total_unique_rows) else: return jsonify(status='not authorized') except ValueError as e: logger.error('Error merging: ', e) db_session.rollback() return make_response(jsonify(status='error'), 500)
async def main(cmd_args): application = Application() application.config.from_yaml(cmd_args.config) application.core.init_resources() # Must be placed after core.init_resources() logger.info("Wiring") wire(application) logger.info("Starting updater service") updater_service: AbstractUpdaterService = application.services.updater_pkg.updater_service( ) await updater_service.start_service() server: uvicorn.Server = application.wsgi.server() logger.info( f"Starting server at {server.config.host}:{server.config.port}") await server.serve(sockets=None)
def get_histogram_bar_chart_data(data, distribution_function, shared_ids, data_name): intersection_data = data[data.matched_id.isin(shared_ids)] distribution, groups = distribution_function(data) distribution_intersection, _ = distribution_function( intersection_data, groups) bins = [] logger.info(data_name) logger.info(distribution_intersection) logger.info(len(data.matched_id.unique())) for bin_index in range(len(distribution)): try: of_status = { "x": data_name, "y": int(distribution.iloc[bin_index]) / len(data.matched_id.unique()) * 100 } except ZeroDivisionError: of_status = {"x": data_name, "y": 0} try: all_status = { "x": "Jail & Homeless", "y": int(distribution_intersection.iloc[bin_index]) / len(intersection_data.matched_id.unique()) * 100 } except Exception as e: logger.error( 'Error encountered while calculating intersection distribution: %s', e) all_status = {"x": "Jail & Homeless", "y": 0} bins.append((of_status, all_status)) return [bins, list(distribution.index)]
async def set_setting(self, setting_name: str, setting_value: Any) -> None: logger.info(f"Set setting {setting_name}={setting_value}") await self._settings_repository.set_one(setting_name, setting_value)
async def update_temp_graph_async(self) -> None: logger.info("Requesting temp graph update") temp_graph = await self._temp_graph_loader.load_temp_graph() self._temp_graph_dumper.dump_temp_graph(temp_graph) logger.debug("temp graph is updated")
async def get_setting(self, setting_name: str) -> SettingV3: logger.info(f"Requesting setting {setting_name}") setting_value = await self._settings_repository.get_one(setting_name) return SettingV3(name=setting_name, value=setting_value)