def test_insert_risk(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) session = database_service.get_sqlalchemy_session() gridpoints = geography_service.generate_grid(dycast_parameters) point = geography_service.get_shape_from_sqlalch_element(gridpoints[0]) risk = Risk(risk_date=datetime.date(int(2016), int(3), int(25)), number_of_cases=5, lat=point.x, long=point.y, close_pairs=3, close_space=2, close_time=1, cumulative_probability=0.032) session.query(Risk.risk_date).filter(Risk.risk_date == risk.risk_date, Risk.lat == risk.lat, Risk.long == risk.long) \ .delete() risk_service.insert_risk(session, risk) session.commit() session.query(Risk.risk_date).filter(Risk.risk_date == risk.risk_date, Risk.lat == risk.lat, Risk.long == risk.long) \ .one()
def test_close_time_only(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) comparative_test_service = comparative_test_service_module.ComparativeTestService(dycast_parameters) session = database_service.get_sqlalchemy_session() riskdate = datetime.date(int(2016), int(3), int(25)) gridpoints = geography_service.generate_grid(dycast_parameters) clusters_per_point_query = risk_service.get_clusters_per_point_query(session, gridpoints, riskdate) clusters_per_point = risk_service.get_clusters_per_point_from_query(clusters_per_point_query) risk_service.enrich_clusters_per_point_with_close_space_and_time(clusters_per_point) # Compare to old query daily_cases_query = comparative_test_service.get_daily_cases_query(session, riskdate) for point in gridpoints: cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(daily_cases_query, point) count_old = comparative_test_service.get_close_time_only(cases_in_cluster_query) for cluster in clusters_per_point: if cluster.point.equals(geography_service.get_shape_from_sqlalch_element(point)): self.assertEquals(cluster.close_in_time, count_old)
def test_get_clusters_per_point_query(self): dycast_parameters = test_helper_functions.get_dycast_parameters(large_dataset=True) risk_service = risk_service_module.RiskService(dycast_parameters) comparative_test_service = comparative_test_service_module.ComparativeTestService(dycast_parameters) session = database_service.get_sqlalchemy_session() riskdate = datetime.date(int(2016), int(3), int(25)) gridpoints = geography_service.generate_grid(dycast_parameters) clusters_per_point_query = risk_service.get_clusters_per_point_query(session, gridpoints, riskdate) clusters_per_point = risk_service.get_clusters_per_point_from_query(clusters_per_point_query) daily_cases_query = comparative_test_service.get_daily_cases_query(session, riskdate) for cluster in clusters_per_point: point_wkt_element = geography_service.get_wktelement_from_wkt(cluster.point.wkt) cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(daily_cases_query, point_wkt_element) vector_count_new = cluster.get_case_count() vector_count_old = database_service.get_count_for_query(cases_in_cluster_query) self.assertEqual(vector_count_new, vector_count_old)
def export_risk(self, dycast_parameters): session = database_service.get_sqlalchemy_session() startdate = dycast_parameters.startdate enddate = dycast_parameters.enddate export_directory = dycast_parameters.export_directory export_prefix = dycast_parameters.export_prefix export_format = dycast_parameters.export_format # Quick and dirty solution if export_format != "tsv" and export_format != "csv": logging.error("Incorrect export format: %s", export_format) return 1 else: separator = self.get_separator(export_format) if export_directory is None: export_directory = CONFIG.get("system", "export_directory") # dates are objects, not strings startdate_string = conversion_service.get_string_from_date_object( startdate) enddate_string = conversion_service.get_string_from_date_object( enddate) export_time = strftime("%Y-%m-%d__%H-%M-%S") filename = "exported_{0}__risk_{1}--{2}.{3}".format( export_time, startdate_string, enddate_string, export_format) if export_prefix: filename = export_prefix + filename filepath = os.path.join(export_directory, filename) logging.info("Exporting risk for: %s - %s", startdate_string, enddate_string) risk_query = self.get_risk_query(session, startdate, enddate) risk_count = database_service.get_count_for_query(risk_query) if risk_count == 0: logging.info("No risk found for the provided dates: %s - %s", startdate_string, enddate_string) return risk_collection = risk_query.all() table_content = file_service.TableContent() header = self.get_header_as_string(separator) table_content.set_header(header) body = self.get_rows_as_string(risk_collection, separator) table_content.set_body(body) file_service.save_file(table_content.get_content(), filepath) return filepath
def test_load_case_data_error(self): session = database_service.get_sqlalchemy_session() import_service = import_service_module.ImportService() dycast_model = dycast_parameters.DycastParameters() dycast_model.srid_of_cases = 3857 location_type = enums.Location_type.LAT_LONG line_incorrect_date = "9998\t30/09/16\t1832445.278\t2118527.399" with self.assertRaises(DataError): import_service.load_case(session, dycast_model, line_incorrect_date, location_type)
def load_case_file(self, dycast_parameters, filename): session = database_service.get_sqlalchemy_session() lines_read = 0 lines_processed = 0 lines_loaded = 0 lines_skipped = 0 location_type = "" try: input_file = file_service.read_file(filename) except Exception: logging.exception("Could not read file: %s", filename) sys.exit(1) for line_number, line in enumerate(input_file): line = remove_trailing_newline(line) if line_number == 0: header_count = line.count("\t") + 1 if header_count == 4: location_type = enums.Location_type.LAT_LONG elif header_count == 3: location_type = enums.Location_type.GEOMETRY else: logging.error("Incorrect column count: %s, exiting...", header_count) sys.exit(1) logging.info("Loading cases as location type: %s", enums.Location_type(location_type).name) else: lines_read += 1 result = 0 try: result = self.load_case(session, dycast_parameters, line, location_type) except Exception: raise # If result is a case ID or -1 (meaning duplicate) then: lines_processed += 1 if result == -1: lines_skipped += 1 else: lines_loaded += 1 try: session.commit() except SQLAlchemyError, e: session.rollback() logging.exception("Couldn't insert cases") logging.exception(e) raise
def test_get_daily_cases_query_old(self): dycast_parameters = test_helper_functions.get_dycast_parameters() comparative_test_service = comparative_test_service_module.ComparativeTestService(dycast_parameters) session = database_service.get_sqlalchemy_session() riskdate = datetime.date(int(2016), int(3), int(25)) daily_cases_query = comparative_test_service.get_daily_cases_query(session, riskdate) count = database_service.get_count_for_query(daily_cases_query) self.assertGreater(count, 0)
def insert_test_cases(): import_service = import_service_module.ImportService() dycast_model = dycast_parameters.DycastParameters() dycast_model.srid_of_cases = '3857' dycast_model.files_to_import = get_test_cases_import_files_latlong() session = database_service.get_sqlalchemy_session() case_query = session.query(Case) case_count = database_service.get_count_for_query(case_query) if case_count == 0: import_service.load_case_files(dycast_model)
def test_get_exact_match_distribution_margin(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) session = database_service.get_sqlalchemy_session() cluster = Cluster() cluster.case_count = 2, cluster.close_space_and_time = 1, cluster.close_in_space = 1, cluster.close_in_time = 1, cumulative_probability = risk_service.get_exact_match_cumulative_probability(session, cluster) self.assertGreater(cumulative_probability, 0)
def test_get_cumulative_probability(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) session = database_service.get_sqlalchemy_session() cluster = Cluster() cluster.case_count = 10 cluster.close_space_and_time = 3 cluster.close_in_space = 5 cluster.close_in_time = 27 risk_service.get_cumulative_probability_for_cluster(session, cluster) self.assertGreater(cluster.cumulative_probability, 0)
def test_get_nearest_close_in_time_distribution_margin_query(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) session = database_service.get_sqlalchemy_session() cluster = Cluster() cluster.case_count = 30 cluster.close_space_and_time = 1 cluster.close_in_space = 2 cluster.close_in_time = 10 nearest_close_in_time_query = risk_service.get_nearest_close_in_time_distribution_margin_query(session, cluster) result = session.query(nearest_close_in_time_query).first() self.assertIsNotNone(result) self.assertGreaterEqual(result, 0)
def test_get_cumulative_probability_by_nearest_close_in_time_and_space(self): dycast_parameters = test_helper_functions.get_dycast_parameters() risk_service = risk_service_module.RiskService(dycast_parameters) session = database_service.get_sqlalchemy_session() cluster = Cluster() cluster.case_count = 30 cluster.close_space_and_time = 1 cluster.close_in_space = 2 cluster.close_in_time = 10 query_result = risk_service.get_cumulative_probability_by_nearest_close_in_time_and_space(session, cluster) cumulative_probability = query_result.cumulative_probability self.assertIsNotNone(cumulative_probability) self.assertGreater(cumulative_probability, 0)
def test_get_cases_in_cluster_query_old(self): dycast_parameters = test_helper_functions.get_dycast_parameters() comparative_test_service = comparative_test_service_module.ComparativeTestService(dycast_parameters) session = database_service.get_sqlalchemy_session() riskdate = datetime.date(int(2016), int(3), int(25)) gridpoints = geography_service.generate_grid(dycast_parameters) point = gridpoints[0] daily_cases_query = comparative_test_service.get_daily_cases_query(session, riskdate) cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(daily_cases_query, point) vector_count = database_service.get_count_for_query(cases_in_cluster_query) self.assertGreater(vector_count, 0)
def test_load_case_correct(self): session = database_service.get_sqlalchemy_session() import_service = import_service_module.ImportService() dycast_model = dycast_parameters.DycastParameters() dycast_model.srid_of_cases = 3857 line_correct = "99999\t03/09/16\t1832445.278\t2118527.399" location_type = enums.Location_type.LAT_LONG import_service.load_case(session, dycast_model, line_correct, location_type) session.commit() query = session.query(Case).filter(Case.id == '99999') count = database_service.get_count_for_query(query) self.assertEquals(count, 1) session.delete(query.first()) session.commit()
def generate_risk(self): session = database_service.get_sqlalchemy_session() logging_service.display_current_parameter_set(self.dycast_parameters) case_threshold = self.dycast_parameters.case_threshold gridpoints = geography_service.generate_grid(self.dycast_parameters) day = self.dycast_parameters.startdate delta = datetime.timedelta(days=1) while day <= self.dycast_parameters.enddate: start_time = time.time() logging.info("Starting daily_risk for %s", day) points_above_threshold = 0 clusters_per_point_query = self.get_clusters_per_point_query( session, gridpoints, day) clusters_per_point = self.get_clusters_per_point_from_query( clusters_per_point_query) for cluster in clusters_per_point: vector_count = cluster.get_case_count() if vector_count >= case_threshold: points_above_threshold += 1 self.get_close_space_and_time_for_cluster(cluster) self.get_cumulative_probability_for_cluster( session, cluster) risk = Risk( risk_date=day, number_of_cases=vector_count, lat=cluster.point.y, long=cluster.point.x, close_pairs=cluster.close_space_and_time, close_space=cluster.close_in_space, close_time=cluster.close_in_time, cumulative_probability=cluster.cumulative_probability) self.insert_risk(session, risk) session.commit() logging.info("Finished daily_risk for %s: done %s points", day, len(gridpoints)) logging.info("Total points above threshold of %s: %s", case_threshold, points_above_threshold) logging.info("Time elapsed: %.0f seconds", time.time() - start_time) day += delta try: session.commit() except SQLAlchemyError, e: session.rollback() logging.exception( "There was a problem committing the risk data session") logging.exception(e) raise
def generate_risk(self): session = database_service.get_sqlalchemy_session() logging_service.display_current_parameter_set(self.dycast_parameters) case_threshold = self.dycast_parameters.case_threshold gridpoints = geography_service.generate_grid(self.dycast_parameters) day = self.dycast_parameters.startdate delta = datetime.timedelta(days=1) while day <= self.dycast_parameters.enddate: daily_cases_query = self.get_daily_cases_query(session, day) daily_case_count = database_service.get_count_for_query( daily_cases_query) if daily_case_count >= case_threshold: start_time = time.time() logging.info("Starting daily_risk for %s", day) points_above_threshold = 0 clusters_per_point = self.get_clusters_per_point_query( session, gridpoints, day) for cluster in clusters_per_point: vector_count = len(cluster.case_array) if vector_count >= case_threshold: points_above_threshold += 1 point = geography_service.get_shape_from_sqlalch_element( cluster.point) risk = Risk(risk_date=day, number_of_cases=vector_count, lat=point.x, long=point.y) for point in gridpoints: cases_in_cluster_query = self.get_cases_in_cluster_query( daily_cases_query, point) vector_count = database_service.get_count_for_query( cases_in_cluster_query) if vector_count >= case_threshold: points_above_threshold += 1 risk = Risk(risk_date=day, number_of_cases=vector_count, lat=point.x, long=point.y) risk.close_pairs = self.get_close_space_and_time( cases_in_cluster_query) risk.close_space = self.get_close_space_only_old( cases_in_cluster_query) - risk.close_pairs risk.close_time = self.get_close_time_only( cases_in_cluster_query) - risk.close_pairs risk.cumulative_probability = self.get_cumulative_probability( session, risk.number_of_cases, risk.close_pairs, risk.close_space, risk.close_time) self.insert_risk(session, risk) logging.info("Finished daily_risk for %s: done %s points", day, len(gridpoints)) logging.info("Total points above threshold of %s: %s", case_threshold, points_above_threshold) logging.info("Time elapsed: %.0f seconds", time.time() - start_time) else: logging.info( "Amount of cases for %s lower than threshold %s: %s, skipping.", day, case_threshold, daily_case_count) day += delta try: session.commit() except SQLAlchemyError, e: session.rollback() logging.exception( "There was a problem committing the risk data session") logging.exception(e) raise
def test_can_get_cases(self): session = database_service.get_sqlalchemy_session() cases = session.query(Case.id).all() case_count = len(cases) self.assertGreater(case_count, 0)