def filter_accuracy(user_id):
    time_query = epq.get_time_range_for_accuracy_filtering(user_id)
    timeseries = esta.TimeSeries.get_time_series(user_id)
    try:
        unfiltered_points_df = timeseries.get_data_df("background/location", time_query)
        filtered_from_unfiltered_df = unfiltered_points_df[unfiltered_points_df.accuracy < 200]
        logging.info("filtered %d of %d points" % (len(filtered_from_unfiltered_df), len(unfiltered_points_df)))
        if len(unfiltered_points_df) == 0:
            epq.mark_accuracy_filtering_done(user_id, None) 
        else:        
            for idx, entry in filtered_from_unfiltered_df.iterrows():
                # First, we check to see if this is a duplicate of an existing entry.
                # If so, we will skip it since it is probably generated as a duplicate...
                if check_prior_duplicate(filtered_from_unfiltered_df, idx, entry):
                    logging.info("Found duplicate entry at index %s, id = %s, lat = %s, lng = %s, skipping" % 
                                    (idx, entry._id, entry.latitude, entry.longitude))
                    continue
                # Next, we check to see if there is an existing "background/filtered_location" point that corresponds
                # to this point. If there is, then we don't want to re-insert. This ensures that this step is idempotent
                if check_existing_filtered_location(timeseries, entry):
                    logging.info("Found existing filtered location for entry at index = %s, id = %s, ts = %s, fmt_time = %s, skipping" % (idx, entry._id, entry.ts, entry.fmt_time))
                    continue
                # logging.debug("Inserting %s filtered entry %s into timeseries" % (idx, entry))
                entry_copy = convert_to_filtered(timeseries.get_entry_at_ts(
                                                    "background/location",
                                                    "metadata.write_ts",
                                                    entry.metadata_write_ts))
                timeseries.insert(entry_copy)
            last_entry_processed = unfiltered_points_df.iloc[-1].metadata_write_ts
            epq.mark_accuracy_filtering_done(user_id, last_entry_processed) 
    except:
        logging.exception("Marking accuracy filtering as failed")
        epq.mark_accuracy_filtering_failed(user_id)
    def testEmptyCallToPriorDuplicate(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location", time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        # Check call to check duplicate with a zero length dataframe
        entry = unfiltered_points_df.iloc[5]
        self.assertEqual(eaicf.check_prior_duplicate(pd.DataFrame(), 0, entry), False)
Exemple #3
0
    def testEmptyCallToPriorDuplicate(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location",
                                                   time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        # Check call to check duplicate with a zero length dataframe
        entry = unfiltered_points_df.iloc[5]
        self.assertEqual(eaicf.check_prior_duplicate(pd.DataFrame(), 0, entry),
                         False)
    def testConvertToFiltered(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location", time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry_from_df = unfiltered_points_df.iloc[5]
        entry_copy = eaicf.convert_to_filtered(self.ts.get_entry_at_ts("background/location",
                                        "metadata.write_ts",
                                        entry_from_df.metadata_write_ts))
        self.assertNotIn("_id", entry_copy)
        self.assertEquals(entry_copy["metadata"]["key"], "background/filtered_location")
    def testCheckPriorDuplicate(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location", time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry = unfiltered_points_df.iloc[5]
        unfiltered_appended_df = pd.DataFrame([entry] * 5).append(unfiltered_points_df).reset_index()
        logging.debug("unfiltered_appended_df = %s" % unfiltered_appended_df[["fmt_time"]].head())

        self.assertEqual(eaicf.check_prior_duplicate(unfiltered_appended_df, 5, entry), True)
        self.assertEqual(eaicf.check_prior_duplicate(unfiltered_points_df, 5, entry), False)
    def testExistingFilteredLocation(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location", time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry_from_df = unfiltered_points_df.iloc[5]
        self.assertEqual(eaicf.check_existing_filtered_location(self.ts, entry_from_df), False)

        entry_copy = self.ts.get_entry_at_ts("background/location", "metadata.write_ts",
                                            entry_from_df.metadata_write_ts)
        self.ts.insert(eaicf.convert_to_filtered(entry_copy))
        self.assertEqual(eaicf.check_existing_filtered_location(self.ts, entry_from_df), True)
Exemple #7
0
    def testConvertToFiltered(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location",
                                                   time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry_from_df = unfiltered_points_df.iloc[5]
        entry_copy = eaicf.convert_to_filtered(
            self.ts.get_entry_at_ts("background/location", "metadata.write_ts",
                                    entry_from_df.metadata_write_ts))
        self.assertNotIn("_id", entry_copy)
        self.assertEquals(entry_copy["metadata"]["key"],
                          "background/filtered_location")
def filter_accuracy(user_id):
    time_query = epq.get_time_range_for_accuracy_filtering(user_id)
    timeseries = esta.TimeSeries.get_time_series(user_id)
    if not continuous_collection_in_range(timeseries):
        logging.debug(
            "Not a public phone, must already have filtered data, early return"
        )
        epq.mark_accuracy_filtering_done(user_id, None)
        return

    try:
        unfiltered_points_df = timeseries.get_data_df("background/location",
                                                      time_query)
        if len(unfiltered_points_df) == 0:
            epq.mark_accuracy_filtering_done(user_id, None)
        else:
            filtered_from_unfiltered_df = unfiltered_points_df[
                unfiltered_points_df.accuracy < 200]
            logging.info(
                "filtered %d of %d points" %
                (len(filtered_from_unfiltered_df), len(unfiltered_points_df)))
            for idx, entry in filtered_from_unfiltered_df.iterrows():
                # First, we check to see if this is a duplicate of an existing entry.
                # If so, we will skip it since it is probably generated as a duplicate...
                if check_prior_duplicate(filtered_from_unfiltered_df, idx,
                                         entry):
                    logging.info(
                        "Found duplicate entry at index %s, id = %s, lat = %s, lng = %s, skipping"
                        % (idx, entry._id, entry.latitude, entry.longitude))
                    continue
                # Next, we check to see if there is an existing "background/filtered_location" point that corresponds
                # to this point. If there is, then we don't want to re-insert. This ensures that this step is idempotent
                if check_existing_filtered_location(timeseries, entry):
                    logging.info(
                        "Found existing filtered location for entry at index = %s, id = %s, ts = %s, fmt_time = %s, skipping"
                        % (idx, entry._id, entry.ts, entry.fmt_time))
                    continue
                # logging.debug("Inserting %s filtered entry %s into timeseries" % (idx, entry))
                entry_copy = convert_to_filtered(
                    timeseries.get_entry_at_ts("background/location",
                                               "metadata.write_ts",
                                               entry.metadata_write_ts))
                timeseries.insert(entry_copy)
            last_entry_processed = unfiltered_points_df.iloc[
                -1].metadata_write_ts
            epq.mark_accuracy_filtering_done(user_id, last_entry_processed)
    except:
        logging.exception("Marking accuracy filtering as failed")
        epq.mark_accuracy_filtering_failed(user_id)
    def testExistingFilteredLocation(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location",
                                                   time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry_from_df = unfiltered_points_df.iloc[5]
        self.assertEqual(
            eaicf.check_existing_filtered_location(self.ts, entry_from_df),
            False)

        entry_copy = self.ts.get_entry_at_ts("background/location",
                                             "metadata.write_ts",
                                             entry_from_df.metadata_write_ts)
        self.ts.insert(eaicf.convert_to_filtered(entry_copy))
        self.assertEqual(
            eaicf.check_existing_filtered_location(self.ts, entry_from_df),
            True)
Exemple #10
0
    def testCheckPriorDuplicate(self):
        time_query = epq.get_time_range_for_accuracy_filtering(self.testUUID)
        unfiltered_points_df = self.ts.get_data_df("background/location",
                                                   time_query)
        self.assertEqual(len(unfiltered_points_df), 205)

        entry = unfiltered_points_df.iloc[5]
        unfiltered_appended_df = pd.DataFrame(
            [entry] * 5).append(unfiltered_points_df).reset_index()
        logging.debug("unfiltered_appended_df = %s" %
                      unfiltered_appended_df[["fmt_time"]].head())

        self.assertEqual(
            eaicf.check_prior_duplicate(unfiltered_appended_df, 0, entry),
            False)
        self.assertEqual(
            eaicf.check_prior_duplicate(unfiltered_appended_df, 5, entry),
            True)
        self.assertEqual(
            eaicf.check_prior_duplicate(unfiltered_points_df, 5, entry), False)