def testResetToPast(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date before both
        - Verify that analysis data for the both days is removed
        - Re-run pipelines
        - Verify that all is well
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Verify that all is well
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset to a date well before the two days
        reset_ts = arrow.get("2015-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be completely deleted
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.assertEqual(api_result, [])

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.assertEqual(api_result, [])

        # Re-running the pipeline again
        etc.runIntakePipeline(self.testUUID)
        
        # Should reconstruct everything
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
Exemple #2
0
    def testResetToFuture(self):
        """
        - Load data for both days
        - Run pipelines
        - Reset to a date after the two
        - Verify that all is well
        - Re-run pipelines and ensure that there are no errors
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Reset to a date well after the two days
        reset_ts = arrow.get("2017-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be untouched because of early return
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Re-running the pipeline again should not affect anything
        etc.runIntakePipeline(self.testUUID)
    def testResetToFuture(self):
        """
        - Load data for both days
        - Run pipelines
        - Reset to a date after the two
        - Verify that all is well
        - Re-run pipelines and ensure that there are no errors
        """
        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Reset to a date well after the two days
        reset_ts = arrow.get("2017-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Data should be untouched because of early return
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Re-running the pipeline again should not affect anything
        etc.runIntakePipeline(self.testUUID)
Exemple #4
0
    def testResetToTsInMiddleOfTrip(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 24.
        # Note that this is actually 23nd 16:00 PDT
        # This will reset in the middle of the untracked time, which is
        # technically a trip, and will allow us to test the trip resetting
        # code
        reset_ts = arrow.get("2016-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
Exemple #5
0
    def testResetToTsInMiddleOfPlace(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1 + ".ground_truth"),
                                   object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2 + ".ground_truth"),
                                   object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook=bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 23.
        # Note that this is actually 22nd 16:00 PDT, so this is partway
        # through the 22nd
        reset_ts = arrow.get("2016-07-23").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # First day is unchanged, except that the last place doesn't have
        # exit data.
        # TODO: Modify ground truth to capture this change
        # Until then, we know that this will fail
        #        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        #        self.compare_result(ad.AttrDict({'result': api_result}).result,
        #                            ad.AttrDict(ground_truth_1).data)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1,
                                            start_ld_1)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2,
                                            start_ld_2)
        self.compare_result(
            ad.AttrDict({
                'result': api_result
            }).result,
            ad.AttrDict(ground_truth_2).data)
Exemple #6
0
        action="store_true",
        default=False,
        help="do everything except actually perform the operations")

    args = parser.parse_args()
    print args

    # Handle the first row in the table
    if args.date is None:
        if args.all:
            epr.reset_all_users_to_start(args.dry_run)
        else:
            user_list = _get_user_list(args)
            logging.info("received list with %s users" % user_list)
            logging.info("first few entries are %s" % user_list[0:5])
            for user_id in user_list:
                logging.info("resetting user %s to start" % user_id)
                epr.reset_user_to_start(user_id, args.dry_run)
    else:
        # Handle the second row in the table
        day_dt = arrow.get(args.date, "YYYY-MM-DD")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = day_dt.timestamp
        logging.debug("day_ts is %s" % day_ts)
        user_list = _get_user_list(args)
        logging.info("received list with %s users" % user_list)
        logging.info("first few entries are %s" % user_list[0:5])
        for user_id in user_list:
            logging.info("resetting user %s to ts %s" % (user_id, day_ts))
            epr.reset_user_to_ts(user_id, day_ts, args.dry_run)
    parser.add_argument("-n", "--dry_run", action="store_true", default=False,
                        help="do everything except actually perform the operations")

    args = parser.parse_args()
    print(args)

    # Handle the first row in the table
    if args.date is None:
        if args.all:
            epr.reset_all_users_to_start(args.dry_run)
        else:
            user_list = _get_user_list(args)
            logging.info("received list with %s users" % user_list)
            logging.info("first few entries are %s" % user_list[0:5])
            for user_id in user_list:
                logging.info("resetting user %s to start" % user_id)
                epr.reset_user_to_start(user_id, args.dry_run)
    else:
    # Handle the second row in the table
        day_dt = arrow.get(args.date, "YYYY-MM-DD")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = day_dt.timestamp
        logging.debug("day_ts is %s" % day_ts)
        user_list = _get_user_list(args)
        logging.info("received list with %s users" % user_list)
        logging.info("first few entries are %s" % user_list[0:5])
        for user_id in user_list:
            logging.info("resetting user %s to ts %s" % (user_id, day_ts))
            epr.reset_user_to_ts(user_id, day_ts, args.dry_run)

    def testResetToTsInMiddleOfTrip(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 24.
        # Note that this is actually 23nd 16:00 PDT
        # This will reset in the middle of the untracked time, which is
        # technically a trip, and will allow us to test the trip resetting
        # code
        reset_ts = arrow.get("2016-07-24").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)
    def testResetToTsInMiddleOfPlace(self):
        """
        - Load data for both days
        - Run pipelines
        - Verify that all is well
        - Reset to a date between the two
        - Verify that analysis data for the first day is unchanged
        - Verify that analysis data for the second day does not exist
        - Re-run pipelines
        - Verify that all is well
        """

        # Load all data
        dataFile_1 = "emission/tests/data/real_examples/shankari_2016-07-22"
        dataFile_2 = "emission/tests/data/real_examples/shankari_2016-07-25"
        start_ld_1 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 22})
        start_ld_2 = ecwl.LocalDate({'year': 2016, 'month': 7, 'day': 25})
        cacheKey_1 = "diary/trips-2016-07-22"
        cacheKey_2 = "diary/trips-2016-07-25"
        ground_truth_1 = json.load(open(dataFile_1+".ground_truth"), object_hook=bju.object_hook)
        ground_truth_2 = json.load(open(dataFile_2+".ground_truth"), object_hook=bju.object_hook)

        # Run both pipelines
        etc.setupRealExample(self, dataFile_1)
        etc.runIntakePipeline(self.testUUID)
        self.entries = json.load(open(dataFile_2), object_hook = bju.object_hook)
        etc.setupRealExampleWithEntries(self)
        etc.runIntakePipeline(self.testUUID)

        # Check results: so far, so good
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)

        # Reset pipeline to july 23.
        # Note that this is actually 22nd 16:00 PDT, so this is partway
        # through the 22nd
        reset_ts = arrow.get("2016-07-23").timestamp
        epr.reset_user_to_ts(self.testUUID, reset_ts, is_dry_run=False)

        # First day is unchanged, except that the last place doesn't have
        # exit data.
        # TODO: Modify ground truth to capture this change
        # Until then, we know that this will fail
#        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
#        self.compare_result(ad.AttrDict({'result': api_result}).result,
#                            ad.AttrDict(ground_truth_1).data)

        # Second day does not exist
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        logging.debug(json.dumps(api_result, indent=4, default=bju.default))
        self.assertEqual(api_result, [])

        # Re-run the pipeline again
        etc.runIntakePipeline(self.testUUID)

        # Should be back to ground truth
        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_1, start_ld_1)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_1).data)

        api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld_2, start_ld_2)
        self.compare_result(ad.AttrDict({'result': api_result}).result,
                            ad.AttrDict(ground_truth_2).data)