コード例 #1
0
    def inner_loop_validation(self, model_name, grid_size, train_subset_dates,current_validation_date):

        model_object = globals()[model_name]()
        df_train_subset = ProcessData.filter_by_date(self.df_train_validation,
                                                     self.dataset_dict,
                                                     train_subset_dates['initial'],
                                                     train_subset_dates['final'])
        trained_model = model_object.train(df_train_subset, self.dataset_dict,
                                            grid_size,
                                            week_day= current_validation_date.strftime("%A"),
                                            region= self.region)
        print(len(trained_model.data.timestamps))
        ## TODO: save trained_model?

        ### validation
        interval_duration = 6 ## TODO: set this var as a parameter
        validation_dates = {'initial':current_validation_date,'final':current_validation_date}
        df_validation = ProcessData.filter_by_date(self.df_train_validation,
                                                   self.dataset_dict,
                                                   validation_dates['initial'],
                                                   validation_dates['final'])
        prediction_date = current_validation_date
        flag_array = True
        prediction_results = np.array([])

        for interval_hour_start in range(0, 24, interval_duration):
            initial_prediction_datetime = prediction_date+timedelta(hours=interval_hour_start)
            final_prediction_datetime = initial_prediction_datetime+timedelta(hours=interval_duration)
            if df_validation.empty: #if no points (e.g. crimes) are reported on data interval
                eval_pts = []
            else:
                validation_pts, _ = ProcessData.get_time_space_points(df_validation,
                                                                      self.dataset_dict)
                eval_pts = ValidateModel.select_timed_points(prediction_date,
                                                             interval_hour_start,
                                                             interval_duration,
                                                             validation_pts)

            prediction_by_hour = ValidateModel.predict_on_interval(initial_prediction_datetime,
                                                                   interval_duration,
                                                                   model_object,
                                                                   trained_model)
            average_prediction = ValidateModel.interval_average_prediction(prediction_by_hour)

            element = np.array([initial_prediction_datetime,
                                final_prediction_datetime,
                                average_prediction,
                                eval_pts])
            flag_array, prediction_results = ProcessData.fill_array(flag_array,
                                                                    prediction_results,
                                                                    element)
        return prediction_results
コード例 #2
0
    def test_hit_rate_1(self):
        """ Test hit_rate=1 if all real events falls on hotspots """
        df = pd.read_csv(
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        data = ProcessData(
            "SIEDCO",
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        df_input = data.add_timestamp(df)
        date = '2018-01-01'
        dataset_dict = data.dataset_dict
        df_filtered = ProcessData.filter_by_date(df_input, dataset_dict, date,
                                                 date)

        timed_pts, region = ProcessData.get_time_space_points(
            df_filtered, data.dataset_dict)
        counting_kernel = open_cp.naive.CountingGridKernel(grid_width=150,
                                                           region=region)
        counting_kernel.data = timed_pts
        grid_prediction = counting_kernel.predict()

        coverages = [2, 4, 6, 8, 10]
        hit_rates_default = prediction_metrics.measure_hit_rates(
            grid_prediction, timed_pts, coverages, 'default')
        hit_rates_ground_truth = prediction_metrics.measure_hit_rates(
            grid_prediction, timed_pts, coverages, 'ground_truth_coverage')
        self.assertEqual(hit_rates_default, {
            2: 1.0,
            4: 1.0,
            6: 1.0,
            8: 1.0,
            10: 1.0
        })
        self.assertEqual(hit_rates_ground_truth, {0.46187915216703573: 1.0})
コード例 #3
0
 def test_filter_by_date_case2(self):
     #case 2: initial date out of available data, siedco
     df = pd.read_csv(self.my_data.dataset_path)
     df = self.my_data.add_timestamp(df)
     initial_date = '2021-01-01'
     final_date = '2021-01-02'
     dataset_dict = self.my_data.dataset_dict
     self.assertWarns(
         UserWarning, lambda: ProcessData.filter_by_date(
             df, dataset_dict, initial_date, final_date))
コード例 #4
0
 def test_filter_by_date_case1(self):
     #case 1: date on interval, siedco
     df = pd.read_csv(self.my_data.dataset_path)
     df = self.my_data.add_timestamp(df)
     initial_date = '2018-01-01'
     final_date = '2018-01-01'
     dataset_dict = self.my_data.dataset_dict
     df_filtered = ProcessData.filter_by_date(df, dataset_dict,
                                              initial_date, final_date)
     df_expected = df.loc[df['FECHA_HECHO'] == "2018-01-01"]
     self.assertEqual(len(df_filtered), len(df_expected))
コード例 #5
0
 def test_filter_by_date_case5(self):
     #case 5: date on interval, rnmc
     dataset = {'name': 'RNMC', 'path': ''}
     self.my_data.dataset_name = 'RNMC'
     self.my_data.dataset_dict = self.my_data.set_dictionary()
     head_path = '/Users/anamaria/Desktop/dev/security_project/datasets/'
     file = '06. verify_enrich_rnmc_12022020.csv'
     df = pd.read_csv(head_path + file)
     df = self.my_data.add_timestamp(df)
     initial_date = '2018-01-01'
     final_date = '2018-01-01'
     dataset_dict = self.my_data.dataset_dict
     df_filtered = ProcessData.filter_by_date(df, dataset_dict,
                                              initial_date, final_date)
     df_expected = df.loc[df['FECHA'] == "2018-01-01"]
     self.assertEqual(len(df_filtered), len(df_expected))
コード例 #6
0
    def test_hit_rate_2(self):
        """ Test hit_rate=0 if no events falls on hotspots """
        df = pd.read_csv(
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        data = ProcessData(
            "SIEDCO",
            "/Users/anamaria/Desktop/dev/security_project/datasets/deduplicate_siedco_09062020.csv"
        )
        df_input = data.add_timestamp(df)
        date = '2018-01-01'
        dataset_dict = data.dataset_dict
        df_input = ProcessData.filter_by_date(df_input, dataset_dict, date,
                                              date)
        df_1 = ProcessData.filter_by_field(df_input, 'LOCALIDAD', 'SUBA')
        df_2 = ProcessData.filter_by_field(df_input, 'LOCALIDAD', 'BOSA')

        timed_pts, region = ProcessData.get_time_space_points(
            df_1, data.dataset_dict)
        counting_kernel = open_cp.naive.CountingGridKernel(grid_width=150,
                                                           region=region)
        counting_kernel.data = timed_pts
        grid_prediction = counting_kernel.predict()

        coverages = [2, 4, 6, 8, 10]
        eval_pts, _ = ProcessData.get_time_space_points(
            df_2, data.dataset_dict)
        hit_rates_default = prediction_metrics.measure_hit_rates(
            grid_prediction, eval_pts, coverages, 'default')
        hit_rates_ground_truth = prediction_metrics.measure_hit_rates(
            grid_prediction, eval_pts, coverages, 'ground_truth_coverage')
        self.assertEqual(hit_rates_default, {
            2: 0.0,
            4: 0.0,
            6: 0.0,
            8: 0.0,
            10: 0.0
        })
        self.assertEqual(hit_rates_ground_truth, {0.6632653061224489: 0.0})