Esempio n. 1
0
    def test_get_clusters_per_point_query(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters(
            large_dataset=False)
        risk_service = risk_service_module.RiskService(dycast_parameters)
        comparative_test_service = comparative_test_service_module.ComparativeTestService(
            dycast_parameters)

        session = database_service.get_sqlalchemy_session()

        riskdate = datetime.date(int(2016), int(3), int(25))
        gridpoints = geography_service.generate_grid(dycast_parameters)

        clusters_per_point_query = risk_service.get_clusters_per_point_query(
            session, gridpoints, riskdate)
        clusters_per_point = risk_service.get_clusters_per_point_from_query(
            clusters_per_point_query)

        daily_cases_query = comparative_test_service.get_daily_cases_query(
            session, riskdate)

        for cluster in clusters_per_point:
            point_wkt_element = geography_service.get_wktelement_from_wkt(
                cluster.point.wkt)

            cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(
                daily_cases_query, point_wkt_element)

            vector_count_new = cluster.get_case_count()
            vector_count_old = database_service.get_count_for_query(
                cases_in_cluster_query)

            self.assertEqual(vector_count_new, vector_count_old)
Esempio n. 2
0
    def test_close_time_only(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)

        comparative_test_service = comparative_test_service_module.ComparativeTestService(
            dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        riskdate = datetime.date(int(2016), int(3), int(25))
        gridpoints = geography_service.generate_grid(dycast_parameters)

        clusters_per_point_query = risk_service.get_clusters_per_point_query(
            session, gridpoints, riskdate)
        clusters_per_point = risk_service.get_clusters_per_point_from_query(
            clusters_per_point_query)

        risk_service.enrich_clusters_per_point_with_close_space_and_time(
            clusters_per_point)

        # Compare to old query
        daily_cases_query = comparative_test_service.get_daily_cases_query(
            session, riskdate)
        for point in gridpoints:
            cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(
                daily_cases_query, point)
            count_old = comparative_test_service.get_close_time_only(
                cases_in_cluster_query)

            for cluster in clusters_per_point:
                if cluster.point.equals(
                        geography_service.get_shape_from_sqlalch_element(
                            point)):
                    self.assertEqual(cluster.close_in_time, count_old)
Esempio n. 3
0
    def test_insert_risk(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        gridpoints = geography_service.generate_grid(dycast_parameters)
        point = geography_service.get_shape_from_sqlalch_element(gridpoints[0])

        risk = Risk(risk_date=datetime.date(int(2016), int(3), int(25)),
                    number_of_cases=5,
                    lat=point.x,
                    long=point.y,
                    close_pairs=3,
                    close_space=2,
                    close_time=1,
                    cumulative_probability=0.032)

        session.query(Risk.risk_date).filter(Risk.risk_date == risk.risk_date,
                                             Risk.lat == risk.lat,
                                             Risk.long == risk.long) \
            .delete()

        risk_service.insert_risk(session, risk)
        session.commit()

        session.query(Risk.risk_date).filter(Risk.risk_date == risk.risk_date,
                                             Risk.lat == risk.lat,
                                             Risk.long == risk.long) \
            .one()
Esempio n. 4
0
    def export_risk(self, dycast_parameters):

        session = database_service.get_sqlalchemy_session()

        startdate = dycast_parameters.startdate
        enddate = dycast_parameters.enddate
        export_directory = dycast_parameters.export_directory
        export_prefix = dycast_parameters.export_prefix
        export_format = dycast_parameters.export_format

        # Quick and dirty solution
        if export_format != "tsv" and export_format != "csv":
            logging.error("Incorrect export format: %s", export_format)
            return 1
        else:
            separator = self.get_separator(export_format)

        if export_directory is None:
            export_directory = CONFIG.get("export_directory")

        # dates are objects, not strings
        startdate_string = conversion_service.get_string_from_date_object(
            startdate)
        enddate_string = conversion_service.get_string_from_date_object(
            enddate)

        export_time = strftime("%Y-%m-%d__%H-%M-%S")
        filename = "exported_{0}__risk_{1}--{2}.{3}".format(
            export_time, startdate_string, enddate_string, export_format)
        if export_prefix:
            filename = export_prefix + filename
        filepath = os.path.join(export_directory, filename)

        logging.info("Exporting risk for: %s - %s", startdate_string,
                     enddate_string)
        risk_query = self.get_risk_query(session, startdate, enddate)
        risk_count = database_service.get_count_for_query(risk_query)

        if risk_count == 0:
            logging.info("No risk found for the provided dates: %s - %s",
                         startdate_string, enddate_string)
            return

        risk_collection = risk_query.all()

        table_content = file_service.TableContent()

        header = self.get_header_as_string(separator)
        table_content.set_header(header)

        body = self.get_rows_as_string(risk_collection, separator)
        table_content.set_body(body)

        file_service.save_file(table_content.get_content(), filepath)

        return filepath
Esempio n. 5
0
    def test_load_case_data_error(self):
        session = database_service.get_sqlalchemy_session()
        import_service = import_service_module.ImportService()

        dycast_model = dycast_parameters.DycastParameters()
        dycast_model.srid_of_cases = 3857

        location_type = enums.Location_type.LAT_LONG
        line_incorrect_date = "9998\t30/09/16\t1832445.278\t2118527.399"

        with self.assertRaises(DataError):
            import_service.load_case(session, dycast_model,
                                     line_incorrect_date, location_type)
Esempio n. 6
0
def insert_test_cases():
    import_service = import_service_module.ImportService()

    dycast_model = dycast_parameters.DycastParameters()

    dycast_model.srid_of_cases = '3857'
    dycast_model.files_to_import = get_test_cases_import_files_latlong()

    session = database_service.get_sqlalchemy_session()
    case_query = session.query(Case)
    case_count = database_service.get_count_for_query(case_query)

    if case_count == 0:
        import_service.load_case_files(dycast_model)
Esempio n. 7
0
    def test_get_daily_cases_query_old(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        comparative_test_service = comparative_test_service_module.ComparativeTestService(
            dycast_parameters)

        session = database_service.get_sqlalchemy_session()

        riskdate = datetime.date(int(2016), int(3), int(25))

        daily_cases_query = comparative_test_service.get_daily_cases_query(
            session, riskdate)
        count = database_service.get_count_for_query(daily_cases_query)

        self.assertGreater(count, 0)
Esempio n. 8
0
    def test_get_cumulative_probability(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        cluster = Cluster()
        cluster.case_count = 10
        cluster.close_space_and_time = 3
        cluster.close_in_space = 5
        cluster.close_in_time = 27

        risk_service.get_cumulative_probability_for_cluster(session, cluster)

        self.assertGreater(cluster.cumulative_probability, 0)
Esempio n. 9
0
    def test_get_exact_match_distribution_margin(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        cluster = Cluster()
        cluster.case_count = 2,
        cluster.close_space_and_time = 1,
        cluster.close_in_space = 1,
        cluster.close_in_time = 1,

        cumulative_probability = risk_service.get_exact_match_cumulative_probability(
            session, cluster)

        self.assertGreater(cumulative_probability, 0)
Esempio n. 10
0
    def test_get_nearest_close_in_time_distribution_margin_query(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        cluster = Cluster()
        cluster.case_count = 30
        cluster.close_space_and_time = 1
        cluster.close_in_space = 2
        cluster.close_in_time = 10

        nearest_close_in_time_query = risk_service.get_nearest_close_in_time_distribution_margin_query(
            session, cluster)
        result = session.query(nearest_close_in_time_query).first()[0]
        self.assertIsNotNone(result)
        self.assertGreaterEqual(result, 0)
Esempio n. 11
0
    def test_get_cumulative_probability_by_nearest_close_in_time_and_space(
            self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        risk_service = risk_service_module.RiskService(dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        cluster = Cluster()
        cluster.case_count = 30
        cluster.close_space_and_time = 1
        cluster.close_in_space = 2
        cluster.close_in_time = 10

        query_result = risk_service.get_cumulative_probability_by_nearest_close_in_time_and_space(
            session, cluster)
        cumulative_probability = query_result.cumulative_probability
        self.assertIsNotNone(cumulative_probability)
        self.assertGreater(cumulative_probability, 0)
Esempio n. 12
0
    def test_load_case_correct(self):
        session = database_service.get_sqlalchemy_session()
        import_service = import_service_module.ImportService()

        dycast_model = dycast_parameters.DycastParameters()
        dycast_model.srid_of_cases = 3857

        line_correct = "99999\t03/09/16\t1832445.278\t2118527.399"
        location_type = enums.Location_type.LAT_LONG

        import_service.load_case(session, dycast_model, line_correct,
                                 location_type)
        session.commit()

        query = session.query(Case).filter(Case.id == '99999')
        count = database_service.get_count_for_query(query)

        self.assertEqual(count, 1)
        session.delete(query.first())
        session.commit()
Esempio n. 13
0
    def test_close_time_only_old(self):

        dycast_parameters = test_helper_functions.get_dycast_parameters()
        comparative_test_service = comparative_test_service_module.ComparativeTestService(
            dycast_parameters)
        session = database_service.get_sqlalchemy_session()

        riskdate = datetime.date(int(2016), int(3), int(25))

        gridpoints = geography_service.generate_grid(dycast_parameters)
        point = gridpoints[0]

        daily_cases_query = comparative_test_service.get_daily_cases_query(
            session, riskdate)

        cases_in_cluster_query = comparative_test_service.get_cases_in_cluster_query(
            daily_cases_query, point)

        count = comparative_test_service.get_close_time_only(
            cases_in_cluster_query)
        self.assertGreater(count, 0)
Esempio n. 14
0
 def test_can_get_cases(self):
     session = database_service.get_sqlalchemy_session()
     cases = session.query(Case.id).all()
     case_count = len(cases)
     self.assertGreater(case_count, 0)
Esempio n. 15
0
    def generate_risk(self):

        session = database_service.get_sqlalchemy_session()
        logging_service.display_current_parameter_set(self.dycast_parameters)

        case_threshold = self.dycast_parameters.case_threshold

        gridpoints = geography_service.generate_grid(self.dycast_parameters)

        day = self.dycast_parameters.startdate
        delta = datetime.timedelta(days=1)

        while day <= self.dycast_parameters.enddate:
            start_time = time.time()
            logging.info("Starting daily_risk for %s", day)
            points_above_threshold = 0

            clusters_per_point_query = self.get_clusters_per_point_query(session, gridpoints, day)
            clusters_per_point = self.get_clusters_per_point_from_query(clusters_per_point_query)

            for cluster in clusters_per_point:
                vector_count = cluster.get_case_count()
                if vector_count >= case_threshold:
                    points_above_threshold += 1
                    self.get_close_space_and_time_for_cluster(cluster)
                    self.get_cumulative_probability_for_cluster(session, cluster)

                    point = geography_service.get_point_from_lat_long(cluster.point.y, cluster.point.x, self.system_srid)

                    risk = Risk(risk_date=day,
                                number_of_cases=vector_count,
                                lat=cluster.point.y,
                                long=cluster.point.x,
                                location=point,
                                close_pairs=cluster.close_space_and_time,
                                close_space=cluster.close_in_space,
                                close_time=cluster.close_in_time,
                                cumulative_probability=cluster.cumulative_probability)

                    self.insert_risk(session, risk)

            session.commit()

            logging.info(
                "Finished daily_risk for %s: done %s points", day, len(gridpoints))
            logging.info("Total points above threshold of %s: %s",
                         case_threshold, points_above_threshold)
            logging.info("Time elapsed: %.0f seconds",
                         time.time() - start_time)

            day += delta

        try:
            session.commit()
        except SQLAlchemyError as e:
            session.rollback()
            logging.exception("There was a problem committing the risk data session")
            logging.exception(e)
            raise
        finally:
            session.close()
Esempio n. 16
0
    def generate_risk(self):

        session = database_service.get_sqlalchemy_session()
        logging_service.display_current_parameter_set(self.dycast_parameters)

        case_threshold = self.dycast_parameters.case_threshold

        gridpoints = geography_service.generate_grid(self.dycast_parameters)

        day = self.dycast_parameters.startdate
        delta = datetime.timedelta(days=1)

        while day <= self.dycast_parameters.enddate:

            daily_cases_query = self.get_daily_cases_query(session, day)
            daily_case_count = database_service.get_count_for_query(
                daily_cases_query)

            if daily_case_count >= case_threshold:
                start_time = time.time()
                logging.info("Starting daily_risk for %s", day)
                points_above_threshold = 0

                clusters_per_point = self.get_clusters_per_point_query(
                    session, gridpoints, day)

                for cluster in clusters_per_point:
                    vector_count = len(cluster.case_array)
                    if vector_count >= case_threshold:
                        points_above_threshold += 1
                        point = geography_service.get_shape_from_sqlalch_element(
                            cluster.point)
                        risk = Risk(risk_date=day,
                                    number_of_cases=vector_count,
                                    lat=point.x,
                                    long=point.y,
                                    location=point)

                for point in gridpoints:
                    cases_in_cluster_query = self.get_cases_in_cluster_query(
                        daily_cases_query, point)
                    vector_count = database_service.get_count_for_query(
                        cases_in_cluster_query)
                    if vector_count >= case_threshold:
                        points_above_threshold += 1
                        risk = Risk(risk_date=day,
                                    number_of_cases=vector_count,
                                    lat=point.x,
                                    long=point.y)

                        risk.close_pairs = self.get_close_space_and_time(
                            cases_in_cluster_query)
                        risk.close_space = self.get_close_space_only_old(
                            cases_in_cluster_query) - risk.close_pairs
                        risk.close_time = self.get_close_time_only(
                            cases_in_cluster_query) - risk.close_pairs

                        risk.cumulative_probability = self.get_cumulative_probability(
                            session, risk.number_of_cases, risk.close_pairs,
                            risk.close_space, risk.close_time)
                        self.insert_risk(session, risk)

                logging.info("Finished daily_risk for %s: done %s points", day,
                             len(gridpoints))
                logging.info("Total points above threshold of %s: %s",
                             case_threshold, points_above_threshold)
                logging.info("Time elapsed: %.0f seconds",
                             time.time() - start_time)
            else:
                logging.info(
                    "Amount of cases for %s lower than threshold %s: %s, skipping.",
                    day, case_threshold, daily_case_count)

            day += delta

        try:
            session.commit()
        except SQLAlchemyError as e:
            session.rollback()
            logging.exception(
                "There was a problem committing the risk data session")
            logging.exception(e)
            raise
        finally:
            session.close()
Esempio n. 17
0
    def load_case_file(self, dycast_parameters, filename):
        session = database_service.get_sqlalchemy_session()

        lines_read = 0
        lines_processed = 0
        lines_loaded = 0
        lines_skipped = 0
        location_type = ""

        try:
            input_file = file_service.read_file(filename)
        except Exception:
            logging.exception("Could not read file: %s", filename)
            raise

        try:
            for line_number, line in enumerate(input_file):
                line = remove_trailing_newline(line)
                if line_number == 0:
                    header_count = line.count("\t") + 1
                    if header_count == 4:
                        location_type = enums.Location_type.LAT_LONG
                    elif header_count == 3:
                        location_type = enums.Location_type.GEOMETRY
                    else:
                        raise ValueError(
                            "Incorrect column count: {header_count}, exiting..."
                            .format(header_count=header_count))
                    logging.info("Loading cases as location type: %s",
                                 enums.Location_type(location_type).name)
                else:
                    lines_read += 1
                    result = 0
                    try:
                        result = self.load_case(session, dycast_parameters,
                                                line, location_type)
                    except Exception:
                        raise

                    # If result is a case ID or -1 (meaning duplicate) then:
                    lines_processed += 1
                    if result == -1:
                        lines_skipped += 1
                    else:
                        lines_loaded += 1
        finally:
            input_file.close()

        try:
            session.commit()
        except SQLAlchemyError as e:
            session.rollback()
            logging.exception("Couldn't insert cases")
            logging.exception(e)
            raise
        finally:
            session.close()

        logging.info("Case load complete: %s", filename)
        logging.info(
            "Processed %s of %s lines, %s loaded, %s duplicate IDs skipped",
            lines_processed, lines_read, lines_loaded, lines_skipped)
        return lines_read, lines_processed, lines_loaded, lines_skipped