class TestAbstractDatasetsUpdate(TestAbstractDatasets):

    def setUp(self):
        """
        These tests use the following dataset configuration:

            d -> dataset
            m -> merged
            l -> linked

            d1   d2
             \  /  \
              m1    l1
                \  /
                 m2

        Dependencies flow from top to bottom.
        """
        TestAbstractDatasets.setUp(self)

    def _create_original_datasets(self):
        self.dataset1_id = self._post_file()
        self.dataset2_id = self._post_file()

    def _add_common_calculations(self):
        self.calculations = Calculations()
        self.calculations.create(
            self.dataset2_id, 'amount + gps_alt', 'amount plus gps_alt')

    def _verify_dataset(self, dataset_id, fixture_path):
        dframe = Dataset.find_one(dataset_id).dframe()
        expected_dframe = recognize_dates(
            pickle.load(open('%s%s' % (
                self.FIXTURE_PATH, fixture_path), 'rb')))
        self._check_dframes_are_equal(dframe, expected_dframe)
 def setUp(self):
     TestBase.setUp(self)
     self.controller = Calculations()
     self.dataset_controller = Datasets()
     self.dataset_id = None
     self.formula = 'amount + gps_alt'
     self.name = 'test'
class TestAbstractDatasetsUpdate(TestAbstractDatasets):
    def setUp(self):
        """
        These tests use the following dataset configuration:

            d -> dataset
            m -> merged
            a -> aggregated

            d1   d2
             \  /  \
              m1    a1
                \  /
                 m2

        Dependencies flow from top to bottom.
        """
        TestAbstractDatasets.setUp(self)

    def _create_original_datasets(self):
        self.dataset1_id = self._post_file()
        self.dataset2_id = self._post_file()

    def _add_common_calculations(self):
        self.calculations = Calculations()
        self.calculations.create(self.dataset2_id, 'amount + gps_alt',
                                 'amount plus gps_alt')

    def _verify_dataset(self, dataset_id, fixture_path):
        dframe = Dataset.find_one(dataset_id).dframe()
        expected_dframe = recognize_dates(
            pickle.load(open('%s%s' % (self.FIXTURE_PATH, fixture_path),
                             'rb')))
        self._check_dframes_are_equal(dframe, expected_dframe)
    def _post_calculations(self, formulae=[], group=None):
        schema = self._load_schema()
        controller = Calculations()

        for idx, formula in enumerate(formulae):
            name = 'calc_%d' % idx if not schema or\
                formula in schema.keys() else formula

            controller.create(self.dataset_id, formula=formula, name=name,
                              group=group)
Exemple #5
0
    def _post_calculations(self, formulae=[], group=None):
        schema = self._load_schema()
        controller = Calculations()

        for idx, formula in enumerate(formulae):
            name = 'calc_%d' % idx if not schema or\
                formula in schema.keys() else formula

            controller.create(self.dataset_id, formula=formula, name=name,
                              group=group)
Exemple #6
0
def connect_routes(dispatcher):
    """This function takes the dispatcher and attaches the routes.

    :param dispatcher: The CherryPy dispatcher.
    """
    # controller instances map
    controllers = {
        'root': Root(),
        'calculations': Calculations(),
        'datasets': Datasets(),
        'version': Version(),
    }

    # map them into args to dispatcher
    dictify = lambda x: dict(zip(['name', 'conditions', 'route', 'controller', 'action'], x))
    route_case = {
        'conditions': lambda v: dict(method=v),
        'controller': lambda v: controllers[v],
    }
    kwarg_map = lambda d: {
        k: route_case.get(k, lambda v: v)(v) for k, v in d.iteritems()
    }

    routes = [kwarg_map(dictify(route)) for route in ROUTES + options()]

    # attach them
    for route in routes:
        dispatcher.connect(**route)
    def setUp(self):
        TestAbstractDatasetsUpdate.setUp(self)
        self._create_original_datasets()

        # create aggregated datasets
        self.calculations = Calculations()
        self.name1 = 'sum of amount'
        self.formula1 = 'sum(amount)'
        self.calculations.create(self.dataset2_id, self.formula1, self.name1)
        result = json.loads(
            self.controller.aggregations(self.dataset2_id))
        self.aggregated_dataset1_id = result['']

        # create merged datasets
        result = json.loads(self.controller.merge(dataset_ids=json.dumps(
            [self.dataset1_id, self.dataset2_id])))
        self.merged_dataset1_id = result[Dataset.ID]

        result = json.loads(self.controller.merge(dataset_ids=json.dumps(
            [self.merged_dataset1_id, self.aggregated_dataset1_id])))
        self.merged_dataset2_id = result[Dataset.ID]
    def setUp(self):
        TestAbstractDatasetsUpdate.setUp(self)
        self._create_original_datasets()

        # create aggregated datasets
        self.calculations = Calculations()
        self.name1 = "sum of amount"
        self.formula1 = "sum(amount)"
        self.calculations.create(self.dataset2_id, self.formula1, self.name1)
        result = json.loads(self.controller.aggregations(self.dataset2_id))
        self.aggregated_dataset1_id = result[""]

        # create merged datasets
        result = json.loads(self.controller.merge(dataset_ids=json.dumps([self.dataset1_id, self.dataset2_id])))
        self.merged_dataset1_id = result[Dataset.ID]

        result = json.loads(
            self.controller.merge(dataset_ids=json.dumps([self.merged_dataset1_id, self.aggregated_dataset1_id]))
        )
        self.merged_dataset2_id = result[Dataset.ID]
class TestDatasetsUpdate(TestAbstractDatasetsUpdate):

    def setUp(self):
        TestAbstractDatasetsUpdate.setUp(self)
        self._create_original_datasets()

        # create aggregated datasets
        self.calculations = Calculations()
        self.name1 = 'sum of amount'
        self.formula1 = 'sum(amount)'
        self.calculations.create(self.dataset2_id, self.formula1, self.name1)
        result = json.loads(
            self.controller.aggregations(self.dataset2_id))
        self.aggregated_dataset1_id = result['']

        # create merged datasets
        result = json.loads(self.controller.merge(dataset_ids=json.dumps(
            [self.dataset1_id, self.dataset2_id])))
        self.merged_dataset1_id = result[Dataset.ID]

        result = json.loads(self.controller.merge(dataset_ids=json.dumps(
            [self.merged_dataset1_id, self.aggregated_dataset1_id])))
        self.merged_dataset2_id = result[Dataset.ID]

    def test_setup_datasets(self):
        self._verify_dataset(self.dataset1_id,
                             'updates/originals/dataset1.pkl')
        self._verify_dataset(self.dataset2_id,
                             'updates/originals/dataset2.pkl')
        self._verify_dataset(
            self.aggregated_dataset1_id,
            'updates/originals/linked_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/originals/merged_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/originals/merged_dataset2.pkl')

    def _test_update1(self):
        for dataset_id in [self.merged_dataset1_id, self.merged_dataset2_id]:
            merged_dataset = Dataset.find_one(dataset_id)
            merged_dframe = merged_dataset.dframe(keep_parent_ids=True)
            for _, row in merged_dframe.iterrows():
                self.assertTrue(PARENT_DATASET_ID in row.keys())

        self._verify_dataset(self.dataset1_id,
                             'updates/update1/dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update1/merged_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update1/merged_dataset2.pkl')

    def test_datasets_update1(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()

    def test_datasets_update1_and_update2(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update2/merged_dataset1.pkl')
        self._verify_dataset(
            self.aggregated_dataset1_id,
            'updates/update2/linked_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update2/merged_dataset2.pkl')

    def test_datasets_update_merged(self):
        self._put_row_updates(self.merged_dataset1_id)
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update_merged/merged_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_merged/merged_dataset2.pkl')

    def test_datasets_update_aggregated_dataset(self):
        self._put_row_updates(
            dataset_id=self.aggregated_dataset1_id,
            file_name='updates/update_agg/update.json')
        self._verify_dataset(
            self.aggregated_dataset1_id,
            'updates/update_agg/linked_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_agg/merged_dataset2.pkl')
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(
            self.dataset2_id,
            'updates/update_agg2/dataset2.pkl')
        self._verify_dataset(
            self.aggregated_dataset1_id,
            'updates/update_agg2/linked_dataset1.pkl')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_agg2/merged_dataset2.pkl')
class TestDatasetsUpdate(TestAbstractDatasetsUpdate):

    def setUp(self):
        TestAbstractDatasetsUpdate.setUp(self)
        self._create_original_datasets()

        # create linked datasets
        self.calculations = Calculations()
        self.name1 = 'sum of amount'
        self.formula1 = 'sum(amount)'
        self.calculations.create(self.dataset2_id, self.formula1, self.name1)
        result = json.loads(
            self.controller.aggregations(self.dataset2_id))
        self.linked_dataset1_id = result['']

        # create merged datasets
        result = json.loads(self.controller.merge(
            datasets=json.dumps([self.dataset1_id, self.dataset2_id])))
        self.merged_dataset1_id = result[Dataset.ID]

        result = json.loads(self.controller.merge(
            datasets=json.dumps(
                [self.merged_dataset1_id, self.linked_dataset1_id])))
        self.merged_dataset2_id = result[Dataset.ID]

    def test_setup_datasets(self):
        self._verify_dataset(self.dataset1_id,
                             'updates/originals/dataset1.p')
        self._verify_dataset(self.dataset2_id,
                             'updates/originals/dataset2.p')
        self._verify_dataset(
            self.linked_dataset1_id,
            'updates/originals/linked_dataset1.p')
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/originals/merged_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/originals/merged_dataset2.p')

    def _test_update1(self):
        for dataset_id in [self.merged_dataset1_id, self.merged_dataset2_id]:
            merged_dataset = Dataset.find_one(dataset_id)
            merged_rows = merged_dataset.observations()
            for row in merged_rows:
                self.assertTrue(PARENT_DATASET_ID in row.keys())

        self._verify_dataset(self.dataset1_id,
                             'updates/update1/dataset1.p')
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update1/merged_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update1/merged_dataset2.p')

    def test_datasets_update1(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()

    def test_datasets_update1_and_update2(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update2/merged_dataset1.p')
        self._verify_dataset(
            self.linked_dataset1_id,
            'updates/update2/linked_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update2/merged_dataset2.p')

    def test_datasets_update_merged(self):
        self._put_row_updates(self.merged_dataset1_id)
        self._verify_dataset(
            self.merged_dataset1_id,
            'updates/update_merged/merged_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_merged/merged_dataset2.p')

    def test_datasets_update_aggregated_dataset(self):
        self._put_row_updates(
            dataset_id=self.linked_dataset1_id,
            file_name='updates/update_agg/update.json')
        self._verify_dataset(
            self.linked_dataset1_id,
            'updates/update_agg/linked_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_agg/merged_dataset2.p')
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(
            self.dataset2_id,
            'updates/update_agg2/dataset2.p')
        self._verify_dataset(
            self.linked_dataset1_id,
            'updates/update_agg2/linked_dataset1.p')
        self._verify_dataset(
            self.merged_dataset2_id,
            'updates/update_agg2/merged_dataset2.p')
Exemple #11
0
 def test_calculations_controller_options(self):
     controller = Calculations()
     controller.options('dataset_id')
 def _add_common_calculations(self):
     self.calculations = Calculations()
     self.calculations.create(
         self.dataset2_id, 'amount + gps_alt', 'amount plus gps_alt')
class TestCalculations(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.controller = Calculations()
        self.dataset_controller = Datasets()
        self.dataset_id = None
        self.formula = 'amount + gps_alt'
        self.name = 'test'

    def __post_formula(self, formula=None, name=None):
        if not formula:
            formula = self.formula
        if not name:
            name = self.name

        if not self.dataset_id:
            self.dataset_id = self._post_file()

        return self.controller.create(self.dataset_id, formula, name)

    def __post_update(self, dataset_id, update):
        return json.loads(self.dataset_controller.update(
            dataset_id=dataset_id, update=json.dumps(update)))

    def __wait_for_calculation_ready(self, dataset_id, name):
        while True:
            calculation = Calculation.find_one(dataset_id, name)

            if calculation.is_ready:
                break

            sleep(self.SLEEP_DELAY)

    def __test_error(self, response, error_text=None):
        response = json.loads(response)

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.ERROR in response)

        if not error_text:
            error_text = 'Must provide'

        self.assertTrue(error_text in response[self.controller.ERROR])

    def __test_create_from_json(self, json_filename, non_agg_cols=1, ex_len=1,
                                group=None):
        json_filepath = 'tests/fixtures/%s' % json_filename
        mock_uploaded_file = self._file_mock(json_filepath)
        dataset = Dataset.find_one(self.dataset_id)
        prev_columns = len(dataset.dframe().columns)
        response = json.loads(self.controller.create(
            self.dataset_id, json_file=mock_uploaded_file, group=group))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[Dataset.ID])

        self.assertEqual(
            ex_len, len(json.loads(self.controller.show(self.dataset_id))))
        self.assertEqual(
            prev_columns + non_agg_cols,
            len(dataset.reload().dframe().columns))

        return dataset

    def __verify_create(self, response):
        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertEqual(response[Dataset.ID], self.dataset_id)

        self.__wait_for_calculation_ready(self.dataset_id, self.name)

        dataset = Dataset.find_one(self.dataset_id)
        dframe = dataset.dframe()

        self.assertTrue(self.name in dataset.schema.keys())
        self.assertTrue(self.name in dframe.columns)
        self.assertEqual(TestAbstractDatasets.NUM_ROWS, len(dframe))
        self.assertEqual(TestAbstractDatasets.NUM_ROWS,
                         dataset.info()[Dataset.NUM_ROWS])

    def test_show(self):
        self.__post_formula()
        response = self.controller.show(self.dataset_id)

        self.assertTrue(isinstance(json.loads(response), list))

    def test_create(self):
        response = json.loads(self.__post_formula())
        self.__verify_create(response)

    @requires_async
    def test_create_async_not_ready(self):
        self.dataset_id = self._create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'))
        response = json.loads(self.__post_formula())
        dataset = Dataset.find_one(self.dataset_id)

        self.assertFalse(dataset.is_ready)
        self.assertTrue(isinstance(response, dict))
        self.assertFalse(DATASET_ID in response)

        self._wait_for_dataset_state(self.dataset_id)

        self.assertFalse(self.name in dataset.schema.keys())

    @requires_async
    def test_create_async_sets_calculation_status(self):
        self.dataset_id = self._create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'))

        self._wait_for_dataset_state(self.dataset_id)

        response = json.loads(self.__post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertEqual(response[Dataset.ID], self.dataset_id)

        response = json.loads(self.controller.show(self.dataset_id))[0]

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(Calculation.STATE in response)
        self.assertEqual(response[Calculation.STATE],
                         Calculation.STATE_PENDING)

        self.__wait_for_calculation_ready(self.dataset_id, self.name)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name in dataset.schema.keys())

    @requires_async
    def test_create_async(self):
        self.dataset_id = self._post_file()

        self._wait_for_dataset_state(self.dataset_id)

        response = json.loads(self.__post_formula())
        self.__verify_create(response)

    def test_create_invalid_formula(self):
        dataset_id = self._post_file()
        result = json.loads(
            self.controller.create(dataset_id, '=NON_EXIST', self.name))

        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Datasets.ERROR in result.keys())

    def test_create_update_summary(self):
        dataset_id = self._post_file()
        Datasets().summary(
            dataset_id,
            select=Datasets.SELECT_ALL_FOR_SUMMARY)
        dataset = Dataset.find_one(dataset_id)

        self.assertTrue(isinstance(dataset.stats, dict))
        self.assertTrue(isinstance(dataset.stats[Dataset.ALL], dict))

        self.__post_formula()

        # stats should have new column for calculation
        dataset = Dataset.find_one(self.dataset_id)
        stats = dataset.stats.get(Dataset.ALL)
        self.assertTrue(self.name in stats.keys())

    def test_delete_nonexistent_calculation(self):
        dataset_id = self._post_file()
        result = json.loads(self.controller.delete(dataset_id, self.name))

        self.assertTrue(Calculations.ERROR in result)

    def test_delete(self):
        self.__post_formula()
        result = json.loads(self.controller.delete(self.dataset_id, self.name))

        self.assertTrue(AbstractController.SUCCESS in result)

        dataset = Dataset.find_one(self.dataset_id)
        self.assertTrue(self.name not in dataset.schema.labels_to_slugs)

    def test_delete_calculation_not_in_dataset(self):
        self.__post_formula()

        # Remove column from dataset
        dataset = Dataset.find_one(self.dataset_id)
        dataset.delete_columns([self.name])

        result = json.loads(self.controller.delete(self.dataset_id, self.name))

        self.assertTrue(AbstractController.SUCCESS in result)

        dataset = Dataset.find_one(self.dataset_id)
        self.assertTrue(self.name not in dataset.schema.labels_to_slugs)

    def test_delete_update_summary(self):
        self.__post_formula()

        dataset = Dataset.find_one(self.dataset_id)
        self.assertTrue(self.name in dataset.stats.get(Dataset.ALL).keys())

        json.loads(self.controller.delete(self.dataset_id, self.name))

        dataset = Dataset.find_one(self.dataset_id)
        self.assertTrue(self.name not in dataset.stats.get(Dataset.ALL).keys())

    def test_show_jsonp(self):
        self.__post_formula()
        results = self.controller.show(self.dataset_id, callback='jsonp')

        self.assertEqual('jsonp(', results[0:6])
        self.assertEqual(')', results[-1])

    def test_create_aggregation(self):
        self.formula = 'sum(amount)'
        self.name = 'test'
        response = json.loads(self.__post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertEqual(response[Dataset.ID], self.dataset_id)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue('' in dataset.aggregated_datasets_dict.keys())

    def test_delete_aggregation(self):
        self.formula = 'sum(amount)'
        self.name = 'test'
        json.loads(self.__post_formula())

        result = json.loads(
            self.controller.delete(self.dataset_id, self.name, ''))

        self.assertTrue(AbstractController.SUCCESS in result)

        dataset = Dataset.find_one(self.dataset_id)
        agg_dataset = dataset.aggregated_dataset('')

        self.assertTrue(self.name not in agg_dataset.schema.labels_to_slugs)

    def test_error_on_delete_calculation_with_dependency(self):
        self.__post_formula()
        dep_name = self.name
        self.formula = dep_name
        self.name = 'test1'
        response = json.loads(self.__post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)

        result = json.loads(
            self.controller.delete(self.dataset_id, dep_name, ''))

        self.assertTrue(AbstractController.ERROR in result)
        self.assertTrue('depend' in result[AbstractController.ERROR])

    def test_create_multiple(self):
        self.dataset_id = self._post_file()
        self.__test_create_from_json(
            'good_eats.calculations.json', non_agg_cols=2, ex_len=2)

    def test_create_multiple_ignore_group(self):
        self.dataset_id = self._post_file()
        dataset = self.__test_create_from_json(
            'good_eats.calculations.json', non_agg_cols=2, ex_len=2,
            group='risk_factor')

        self.assertEqual(dataset.aggregated_datasets_dict, {})

    def test_create_json_single(self):
        self.dataset_id = self._post_file()
        self.__test_create_from_json('good_eats_single.calculations.json')

    def test_create_multiple_with_group(self):
        self.dataset_id = self._post_file()
        groups = ['risk_factor', 'risk_factor,food_type', 'food_type']
        dataset = self.__test_create_from_json(
            'good_eats_group.calculations.json', non_agg_cols=2, ex_len=6)

        for group in groups:
            self.assertTrue(group in dataset.aggregated_datasets_dict.keys())
            dframe = dataset.aggregated_dataset(group).dframe()

            for column in Calculation().split_groups(group):
                self.assertTrue(column in dframe.columns)

    def test_create_with_missing_args(self):
        self.dataset_id = self._post_file()
        self.__test_error(self.controller.create(self.dataset_id))
        self.__test_error(
            self.controller.create(self.dataset_id, formula='gps_alt'))
        self.__test_error(
            self.controller.create(self.dataset_id, name='test'))

    def test_create_with_bad_json(self):
        self.dataset_id = self._post_file()
        json_filepath = self._fixture_path_prefix(
            'good_eats_bad.calculations.json')
        mock_uploaded_file = self._file_mock(json_filepath)

        self.__test_error(
            self.controller.create(self.dataset_id,
                                   json_file=mock_uploaded_file),
            error_text='Required')

        # Mock is now an empty file
        self.__test_error(
            self.controller.create(self.dataset_id,
                                   json_file=mock_uploaded_file),
            error_text='Improper format for JSON')

    def test_create_reserved_name(self):
        name = 'sum'
        response = json.loads(self.__post_formula(None, name))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertEqual(response[Dataset.ID], self.dataset_id)

        dataset = Dataset.find_one(self.dataset_id)
        slug = dataset.schema.labels_to_slugs[name]
        response = json.loads(self.__post_formula('%s + amount' % slug))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[Dataset.ID])

    def test_create_with_duplicate_names(self):
        formula_names_to_valid = {
            'water_not_functioning_none': True,   # an already slugged column
            'water_not_functioning/none': False,  # a non-slug column
            'region': False,    # an existing column
            'date': False,      # a reserved key and an existing column
            'sum': True,        # a reserved key
        }

        for formula_name, valid in formula_names_to_valid.items():
            dataset_id = self._post_file('water_points.csv')
            dframe_before = Dataset.find_one(dataset_id).dframe()

            # a calculation
            response = json.loads(self.controller.create(
                dataset_id,
                'water_source_type in ["borehole"]',
                formula_name))

            self.assertTrue(isinstance(response, dict))

            if valid:
                self.assertTrue(self.controller.SUCCESS in response)
            else:
                self.assertTrue(self.controller.ERROR in response)
                self.assertTrue(
                    formula_name in response[self.controller.ERROR])

            dataset = Dataset.find_one(dataset_id)

            if valid:
                name = dataset.calculations()[-1].name

            # an aggregation
            response = json.loads(self.controller.create(
                dataset_id,
                'newest(date_, water_functioning)',
                formula_name))

            self.assertTrue(isinstance(response, dict))
            self.assertTrue(self.controller.SUCCESS in response)

            dframe_after = dataset.dframe()

            # Does not change data
            self.assertEqual(len(dframe_before), len(dframe_after))

            if valid:
                slug = dataset.schema.labels_to_slugs[name]
                self.assertTrue(slug not in dframe_before.columns)
                self.assertTrue(slug in dframe_after.columns)

            if valid:
                # Does change columns
                self.assertEqual(
                    len(dframe_before.columns) + 1, len(dframe_after.columns))
            else:
                # Does not change columns
                self.assertEqual(
                    len(dframe_before.columns), len(dframe_after.columns))

            # check OK on update
            update = {
                'date': '2013-01-05',
                'water_source_type': 'borehole',
            }
            result = self.__post_update(dataset_id, update)
            self.assertTrue(Dataset.ID in result)
            dataset = Dataset.find_one(dataset_id)
            dframe_after_update = dataset.dframe()
            self.assertEqual(len(dframe_after) + 1, len(dframe_after_update))

    def test_cannot_create_aggregations_with_duplicate_names(self):
        dataset_id = self._post_file('water_points.csv')

        formula_name = 'name'

        response = json.loads(self.controller.create(
            dataset_id,
            'newest(date_, water_functioning)',
            formula_name))

        self.assertTrue(self.controller.SUCCESS in response)

        # another with the same name
        response = json.loads(self.controller.create(
            dataset_id,
            'newest(date_, water_functioning)',
            formula_name))

        self.assertTrue(formula_name in response[self.controller.ERROR])

    def test_can_create_aggregations_with_duplicate_as_slug_names(self):
        dataset_id = self._post_file('water_points.csv')

        formula_name = 'name*'

        response = json.loads(self.controller.create(
            dataset_id,
            'newest(date_, water_functioning)',
            formula_name))

        self.assertTrue(self.controller.SUCCESS in response)

        # another with the same name
        response = json.loads(self.controller.create(
            dataset_id,
            'newest(date_, water_functioning)',
            'name_'))

        self.assertTrue(self.controller.SUCCESS in response)

    def test_newest(self):
        expected_dataset = {
            u'wp_functional': {0: u'no', 1: u'yes', 2: u'no', 3: u'yes'},
            u'id': {0: 1, 1: 2, 2: 3, 3: 4}}
        dataset_id = self._post_file('newest_test.csv')
        self.controller.create(dataset_id,
                               'newest(submit_date,functional)',
                               'wp_functional', group='id')
        dataset = Dataset.find_one(dataset_id)
        agg_ds = dataset.aggregated_dataset('id')

        self.assertEqual(expected_dataset, agg_ds.dframe().to_dict())

    def test_update_after_agg(self):
        dataset_id = self._post_file('wp_data.csv')
        results = json.loads(self.controller.create(dataset_id,
                             'newest(submit_date,wp_id)', 'wp_newest'))

        dataset = Dataset.find_one(dataset_id)
        previous_num_rows = dataset.num_rows

        self.assertTrue(self.controller.SUCCESS in results)
        self.assertFalse(dataset.aggregated_dataset('') is None)

        update = {
            'submit_date': '2013-01-05',
            'wp_id': 'D',
            'functional': 'no',
        }
        self.__post_update(dataset_id, update)
        update = {
            'wp_id': 'E',
            'functional': 'no',
        }
        self.__post_update(dataset_id, update)

        dataset = Dataset.find_one(dataset_id)
        current_num_rows = dataset.num_rows
        agg_df = dataset.aggregated_dataset('').dframe()

        self.assertEqual(agg_df.get_value(0, 'wp_newest'), 'D')
        self.assertEqual(current_num_rows, previous_num_rows + 2)

    @requires_async
    def test_update_after_agg_group(self):
        dataset_id = self._post_file('wp_data.csv')
        group = 'wp_id'
        self._wait_for_dataset_state(dataset_id)

        test_calculations = {
            'newest(submit_date,functional)': 'wp_functional',
            'max(submit_date)': 'latest_submit_date',
            'ratio(functional in ["yes"], 1)': 'wp_func_ratio'}

        expected_results = {'wp_id': ['A', 'B', 'C', 'n/a'],
                            'wp_functional': ['yes', 'no', 'yes', 'yes'],
                            'wp_func_ratio': [1.0, 0.0, 1.0, 1.0],
                            'wp_func_ratio_denominator': [1, 1, 1, 1],
                            'wp_func_ratio_numerator': [1.0, 0.0, 1.0, 1.0],
                            'latest_submit_date': [1356998400, 1357084800,
                                                   1357171200, 1357257600]}

        expected_results_after = {
            'wp_id': ['A', 'B', 'C', 'D', 'n/a'],
            'wp_functional': ['no', 'no', 'yes', 'yes'],
            'wp_func_ratio': [0.5, 0.0, 1.0, 1.0, 1.0],
            'wp_func_ratio_denominator': [2.0, 1.0, 1.0, 1.0, 1.0],
            'wp_func_ratio_numerator': [1.0, 0.0, 1.0, 1.0, 1.0],
            'latest_submit_date': [1357603200.0, 1357084800.0,
                                   1357171200.0, 1357257600.0]}

        for formula, name in test_calculations.items():
            results = json.loads(self.controller.create(
                dataset_id, formula, name, group=group))

            self.assertTrue(self.controller.SUCCESS in results)

        dataset = Dataset.find_one(dataset_id)
        previous_num_rows = dataset.num_rows

        while True:
            dataset = Dataset.find_one(dataset_id)

            if dataset.aggregated_dataset(group) and all(
                    [not c.is_pending for c in dataset.calculations()]):
                break
            sleep(self.SLEEP_DELAY)

        agg_dframe = dataset.aggregated_dataset(group).dframe()
        self.assertEqual(set(expected_results.keys()),
                         set(agg_dframe.columns.tolist()))

        for column, results in expected_results.items():
            self.assertEqual(results,
                             agg_dframe[column].tolist())

        update = {
            'wp_id': 'D',
            'functional': 'yes',
        }
        self.__post_update(dataset_id, update)
        update = {
            'submit_date': '2013-01-08',
            'wp_id': 'A',
            'functional': 'no',
        }
        self.__post_update(dataset_id, update)

        while True:
            dataset = Dataset.find_one(dataset_id)
            current_num_rows = dataset.num_rows

            if not len(dataset.pending_updates):
                break

            sleep(self.SLEEP_DELAY)

        dataset = Dataset.find_one(dataset_id)
        agg_dframe = dataset.aggregated_dataset(group).dframe()

        self.assertEqual(current_num_rows, previous_num_rows + 2)
        self.assertEqual(set(expected_results_after.keys()),
                         set(agg_dframe.columns.tolist()))
        for column, results in expected_results_after.items():
            column = [x for x in agg_dframe[column].tolist() if not
                      is_float_nan(x)]
            self.assertEqual(results, column)

    @requires_async
    def test_fail_in_background(self):
        dataset_id = self._post_file('wp_data.csv')
        group = 'wp_id'
        self._wait_for_dataset_state(dataset_id)

        self.controller.create(dataset_id,
                               'newest(submit_date,functional)',
                               'wp_functional',
                               group=group)
        self.controller.create(dataset_id,
                               'max(submit_date)',
                               'latest_submit_date',
                               group=group)

        # Update the name to cause has pending to be true and infinite retries.
        # It will fail after 10 retries.
        calc = Calculation.find_one(dataset_id, 'latest_submit_date', group)
        calc.update({calc.NAME: 'another_name'})

        update = {
            'wp_id': 'D',
            'functional': 'yes',
        }
        self.__post_update(dataset_id, update)
        update = {
            'submit_date': '2013-01-08',
            'wp_id': 'A',
            'functional': 'no',
        }
        self.__post_update(dataset_id, update)

        while True:
            dataset = Dataset.find_one(dataset_id)
            calcs_not_pending = [
                c.state != c.STATE_PENDING for c in dataset.calculations()]

            if not len(dataset.pending_updates) and all(calcs_not_pending):
                break

            sleep(self.SLEEP_DELAY)

        for c in dataset.calculations():
            self.assertEqual(c.STATE_FAILED, c.state)
            self.assertTrue('Traceback' in c.error_message)

    def test_fail_then_create(self):
        response = json.loads(self.__post_formula())
        self.__verify_create(response)

        # Overwrite as failed
        calc = Calculation.find_one(self.dataset_id, self.name)
        calc.update({calc.STATE: calc.STATE_FAILED})

        # Test we can still add a calculation
        self.name = 'test2'
        response = json.loads(self.__post_formula())
        self.__verify_create(response)
Exemple #14
0
class TestCalculations(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.controller = Calculations()
        self.dataset_controller = Datasets()
        self.dataset_id = None
        self.formula = 'amount + gps_alt'
        self.name = 'test'

    def _post_formula(self, formula=None, name=None):
        if not formula:
            formula = self.formula
        if not name:
            name = self.name

        if not self.dataset_id:
            self.dataset_id = self._post_file()

        return self.controller.create(self.dataset_id, formula, name)

    def _post_update(self, dataset_id, update):
        return json.loads(self.dataset_controller.update(
            dataset_id=dataset_id, update=json.dumps(update)))

    def _wait_for_calculation_ready(self, dataset_id, name):
        while True:
            calculation = Calculation.find_one(dataset_id, name)

            if calculation.is_ready:
                break

            sleep(self.SLEEP_DELAY)

    def _test_error(self, response, error_text=None):
        response = json.loads(response)

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.ERROR in response)

        if not error_text:
            error_text = 'Must provide'

        self.assertTrue(error_text in response[self.controller.ERROR])

    def _test_create_from_json(self, json_filename, non_agg_cols=1, group=None,
                               ex_len=1):
        json_filepath = 'tests/fixtures/%s' % json_filename
        mock_uploaded_file = self._file_mock(json_filepath)
        dataset = Dataset.find_one(self.dataset_id)
        prev_columns = len(dataset.dframe().columns)
        response = json.loads(self.controller.create(
            self.dataset_id, json_file=mock_uploaded_file, group=group))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        self.assertEqual(
            ex_len, len(json.loads(self.controller.show(self.dataset_id))))
        self.assertEqual(
            prev_columns + non_agg_cols,
            len(dataset.reload().dframe().columns))

        return dataset

    def test_show(self):
        self._post_formula()
        response = self.controller.show(self.dataset_id)

        self.assertTrue(isinstance(json.loads(response), list))

    def test_create(self):
        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        dataset = Dataset.find_one(self.dataset_id)

        self.assertEqual(TestAbstractDatasets.NUM_ROWS, len(dataset.dframe()))

    @requires_async
    def test_create_async_not_ready(self):
        self.dataset_id = create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'),
            allow_local_file=True).dataset_id
        response = json.loads(self._post_formula())
        dataset = Dataset.find_one(self.dataset_id)

        self.assertFalse(dataset.is_ready)
        self.assertTrue(isinstance(response, dict))
        self.assertFalse(DATASET_ID in response)

        self._wait_for_dataset_state(self.dataset_id)

        self.assertFalse(self.name in dataset.schema.keys())

    @requires_async
    def test_create_async_sets_calculation_status(self):
        self.dataset_id = create_dataset_from_url(
            '%s%s' % (self._local_fixture_prefix(), 'good_eats_huge.csv'),
            allow_local_file=True).dataset_id

        self._wait_for_dataset_state(self.dataset_id)

        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        response = json.loads(self.controller.show(self.dataset_id))[0]

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(Calculation.STATE in response)
        self.assertEqual(response[Calculation.STATE],
                         Calculation.STATE_PENDING)

        self._wait_for_calculation_ready(self.dataset_id, self.name)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name in dataset.schema.keys())

    @requires_async
    def test_create_async(self):
        self.dataset_id = self._post_file()

        self._wait_for_dataset_state(self.dataset_id)

        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        self._wait_for_calculation_ready(self.dataset_id, self.name)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name in dataset.schema.keys())

        dataset = Dataset.find_one(self.dataset_id)

        self.assertEqual(TestAbstractDatasets.NUM_ROWS, len(dataset.dframe()))

    def test_create_invalid_formula(self):
        dataset_id = self._post_file()
        result = json.loads(
            self.controller.create(dataset_id, '=NON_EXIST', self.name))

        self.assertTrue(isinstance(result, dict))
        self.assertTrue(Datasets.ERROR in result.keys())

    def test_create_remove_summary(self):
        dataset_id = self._post_file()
        Datasets().summary(
            dataset_id,
            select=Datasets.SELECT_ALL_FOR_SUMMARY)
        dataset = Dataset.find_one(dataset_id)

        self.assertTrue(isinstance(dataset.stats, dict))
        self.assertTrue(isinstance(dataset.stats[Dataset.ALL], dict))

        self._post_formula()
        # stats should have new column for calculation
        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name in dataset.stats.get(Dataset.ALL).keys())

    def test_delete_nonexistent_calculation(self):
        dataset_id = self._post_file()
        result = json.loads(self.controller.delete(dataset_id, self.name))

        self.assertTrue(Calculations.ERROR in result)

    def test_delete(self):
        self._post_formula()
        result = json.loads(self.controller.delete(self.dataset_id, self.name))

        self.assertTrue(AbstractController.SUCCESS in result)

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue(self.name not in dataset.schema.labels_to_slugs)

    def test_show_jsonp(self):
        self._post_formula()
        results = self.controller.show(self.dataset_id, callback='jsonp')

        self.assertEqual('jsonp(', results[0:6])
        self.assertEqual(')', results[-1])

    def test_create_aggregation(self):
        self.formula = 'sum(amount)'
        self.name = 'test'
        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        dataset = Dataset.find_one(self.dataset_id)

        self.assertTrue('' in dataset.aggregated_datasets_dict.keys())

    def test_delete_aggregation(self):
        self.formula = 'sum(amount)'
        self.name = 'test'
        response = json.loads(self._post_formula())
        result = json.loads(
            self.controller.delete(self.dataset_id, self.name, ''))

        self.assertTrue(AbstractController.SUCCESS in result)

        dataset = Dataset.find_one(self.dataset_id)
        agg_dataset = Dataset.find_one(dataset.aggregated_datasets_dict[''])

        self.assertTrue(self.name not in agg_dataset.schema.labels_to_slugs)

    def test_error_on_delete_calculation_with_dependency(self):
        self._post_formula()
        dep_name = self.name
        self.formula = dep_name
        self.name = 'test1'
        response = json.loads(self._post_formula())

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)

        result = json.loads(
            self.controller.delete(self.dataset_id, dep_name, ''))

        self.assertTrue(AbstractController.ERROR in result)
        self.assertTrue('depend' in result[AbstractController.ERROR])

    def test_create_multiple(self):
        self.dataset_id = self._post_file()
        self._test_create_from_json(
            'good_eats.calculations.json', non_agg_cols=2, ex_len=2)

    def test_create_multiple_ignore_group(self):
        self.dataset_id = self._post_file()
        dataset = self._test_create_from_json(
            'good_eats.calculations.json', non_agg_cols=2, ex_len=2,
            group='risk_factor')

        self.assertEqual(dataset.aggregated_datasets_dict, {})

    def test_create_json_single(self):
        self.dataset_id = self._post_file()
        self._test_create_from_json('good_eats_single.calculations.json')

    def test_create_multiple_with_group(self):
        self.dataset_id = self._post_file()
        group = 'risk_factor'
        dataset = self._test_create_from_json(
            'good_eats_group.calculations.json',
            non_agg_cols=2, ex_len=3, group=group)

        self.assertTrue(group in dataset.aggregated_datasets_dict.keys())

    def test_create_with_missing_args(self):
        self.dataset_id = self._post_file()
        self._test_error(self.controller.create(self.dataset_id))
        self._test_error(
            self.controller.create(self.dataset_id, formula='gps_alt'))
        self._test_error(
            self.controller.create(self.dataset_id, name='test'))

    def test_create_with_bad_json(self):
        self.dataset_id = self._post_file()
        json_filepath = self._fixture_path_prefix(
            'good_eats_bad.calculations.json')
        mock_uploaded_file = self._file_mock(json_filepath)

        self._test_error(
            self.controller.create(self.dataset_id,
                                   json_file=mock_uploaded_file),
            error_text='Required')

        # Mock is now an empty file
        self._test_error(
            self.controller.create(self.dataset_id,
                                   json_file=mock_uploaded_file),
            error_text='No JSON')

    def test_create_reserved_name(self):
        name = 'sum'
        response = json.loads(self._post_formula(None, name))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

        dataset = Dataset.find_one(self.dataset_id)
        slug = dataset.schema.labels_to_slugs[name]
        response = json.loads(self._post_formula('%s + amount' % slug))

        self.assertTrue(isinstance(response, dict))
        self.assertTrue(self.controller.SUCCESS in response)
        self.assertTrue(self.dataset_id in response[self.controller.SUCCESS])

    def test_create_dataset_with_duplicate_column_names(self):
        formula_names = [
            'water_not_functioning_none',  # an already slugged column
            'water_not_functioning/none',  # a non-slug column
            'region',                # an existing column
            'sum',                   # a reserved key
            'date',                  # a reserved key and an existing column
        ]

        for formula_name in formula_names:
            dataset_id = self._post_file('water_points.csv')
            dframe_before = Dataset.find_one(dataset_id).dframe()

            # a calculation
            response = json.loads(self.controller.create(
                dataset_id,
                'water_source_type in ["borehole"]',
                formula_name))

            self.assertTrue(isinstance(response, dict))
            self.assertTrue(self.controller.SUCCESS in response)

            dataset = Dataset.find_one(dataset_id)
            name = dataset.calculations()[-1].name

            # an aggregation
            response = json.loads(self.controller.create(
                dataset_id,
                'newest(date_, water_functioning)',
                formula_name))

            self.assertTrue(isinstance(response, dict))
            self.assertTrue(self.controller.SUCCESS in response)

            dframe_after = dataset.dframe()
            slug = dataset.schema.labels_to_slugs[name]

            self.assertEqual(len(dframe_before), len(dframe_after))
            self.assertTrue(slug not in dframe_before.columns)
            self.assertTrue(slug in dframe_after.columns)
            self.assertEqual(
                len(dframe_before.columns) + 1, len(dframe_after.columns))

            # check OK on update
            update = {
                'date': '2013-01-05',
                'water_source_type': 'borehole',
            }
            result = self._post_update(dataset_id, update)
            dataset = Dataset.find_one(dataset_id)
            dframe_after_update = dataset.dframe()
            self.assertEqual(len(dframe_after) + 1, len(dframe_after_update))

    def test_newest(self):
        expected_dataset = {
            u'wp_functional': {0: u'no', 1: u'yes', 2: u'no', 3: u'yes'},
            u'id': {0: 1, 1: 2, 2: 3, 3: 4}}
        dataset_id = self._post_file('newest_test.csv')
        self.controller.create(dataset_id,
                               'newest(submit_date,functional)',
                               'wp_functional', group='id')
        dataset = Dataset.find_one(dataset_id)
        agg_ds = dataset.aggregated_datasets['id']

        self.assertEqual(expected_dataset, agg_ds.dframe().to_dict())

    def test_update_after_agg(self):
        dataset_id = self._post_file('wp_data.csv')
        results = json.loads(self.controller.create(dataset_id,
                             'newest(submit_date,wp_id)', 'wp_newest'))

        dataset = Dataset.find_one(dataset_id)
        previous_num_rows = dataset.num_rows

        self.assertTrue(self.controller.SUCCESS in results.keys())
        self.assertFalse(dataset.aggregated_datasets.get('') is None)

        update = {
            'submit_date': '2013-01-05',
            'wp_id': 'D',
            'functional': 'no',
        }
        self._post_update(dataset_id, update)
        update = {
            'wp_id': 'E',
            'functional': 'no',
        }
        self._post_update(dataset_id, update)

        dataset = Dataset.find_one(dataset_id)
        current_num_rows = dataset.num_rows

        self.assertEqual(
            dataset.aggregated_datasets[''].dframe().get_value(0, 'wp_newest'),
            'D')
        self.assertEqual(current_num_rows, previous_num_rows + 2)

    @requires_async
    def test_update_after_agg_group(self):
        dataset_id = self._post_file('wp_data.csv')
        group = 'wp_id'
        self._wait_for_dataset_state(dataset_id)

        results = json.loads(self.controller.create(dataset_id,
                             'newest(submit_date,functional)', 'wp_functional',
                             group=group))
        results = json.loads(self.controller.create(dataset_id,
                             'max(submit_date)', 'latest_submit_date',
                             group=group))

        dataset = Dataset.find_one(dataset_id)
        previous_num_rows = dataset.num_rows

        while True:
            dataset = Dataset.find_one(dataset_id)

            if dataset.aggregated_datasets.get(group) and all(
                    [c.is_ready for c in dataset.calculations()]):
                break
            sleep(1)

        agg_dframe = dataset.aggregated_datasets[group].dframe()
        self.assertEqual(
            set(['wp_id', 'wp_functional', 'latest_submit_date']),
            set(agg_dframe.columns.tolist()))

        self.assertTrue(self.controller.SUCCESS in results.keys())

        update = {
            'wp_id': 'D',
            'functional': 'yes',
        }
        self._post_update(dataset_id, update)
        update = {
            'submit_date': '2013-01-08',
            'wp_id': 'A',
            'functional': 'no',
        }
        self._post_update(dataset_id, update)

        while True:
            dataset = Dataset.find_one(dataset_id)
            current_num_rows = dataset.num_rows

            if not len(dataset.pending_updates):
                break

            sleep(1)

        dataset = Dataset.find_one(dataset_id)
        agg_dframe = dataset.aggregated_datasets[group].dframe()

        self.assertEqual(agg_dframe.get_value(0, 'wp_id'), 'A')
        self.assertEqual(current_num_rows, previous_num_rows + 2)
        self.assertEqual(set(agg_dframe[group]),
                         set(['A', 'B', 'C', 'D', 'n/a']))

    @requires_async
    def test_fail_in_background(self):
        dataset_id = self._post_file('wp_data.csv')
        group = 'wp_id'
        self._wait_for_dataset_state(dataset_id)

        results = json.loads(self.controller.create(dataset_id,
                             'newest(submit_date,functional)', 'wp_functional',
                             group=group))
        results = json.loads(self.controller.create(dataset_id,
                             'max(submit_date)', 'latest_submit_date',
                             group=group))

        # Update the name to cause has pending to be true and infinite retries.
        # It will fail after 10 retries.
        calc = Calculation.find_one(dataset_id, 'latest_submit_date', group)
        calc.update({calc.NAME: 'another_name'})

        update = {
            'wp_id': 'D',
            'functional': 'yes',
        }
        self._post_update(dataset_id, update)
        update = {
            'submit_date': '2013-01-08',
            'wp_id': 'A',
            'functional': 'no',
        }
        self._post_update(dataset_id, update)

        while True:
            dataset = Dataset.find_one(dataset_id)
            current_num_rows = dataset.num_rows
            calcs_not_pending = [
                c.state != c.STATE_PENDING for c in dataset.calculations()]

            if not len(dataset.pending_updates) and all(calcs_not_pending):
                break

            sleep(1)

        for c in dataset.calculations():
            self.assertEqual(c.STATE_FAILED, c.state)
            self.assertTrue('Traceback' in c.error_message)
Exemple #15
0
class TestDatasetsUpdate(TestAbstractDatasetsUpdate):
    def setUp(self):
        TestAbstractDatasetsUpdate.setUp(self)
        self._create_original_datasets()

        # create aggregated datasets
        self.calculations = Calculations()
        self.name1 = "sum of amount"
        self.formula1 = "sum(amount)"
        self.calculations.create(self.dataset2_id, self.formula1, self.name1)
        result = json.loads(self.controller.aggregations(self.dataset2_id))
        self.aggregated_dataset1_id = result[""]

        # create merged datasets
        result = json.loads(self.controller.merge(dataset_ids=json.dumps([self.dataset1_id, self.dataset2_id])))
        self.merged_dataset1_id = result[Dataset.ID]

        result = json.loads(
            self.controller.merge(dataset_ids=json.dumps([self.merged_dataset1_id, self.aggregated_dataset1_id]))
        )
        self.merged_dataset2_id = result[Dataset.ID]

    def test_setup_datasets(self):
        self._verify_dataset(self.dataset1_id, "updates/originals/dataset1.pkl")
        self._verify_dataset(self.dataset2_id, "updates/originals/dataset2.pkl")
        self._verify_dataset(self.aggregated_dataset1_id, "updates/originals/linked_dataset1.pkl")
        self._verify_dataset(self.merged_dataset1_id, "updates/originals/merged_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/originals/merged_dataset2.pkl")

    def _test_update1(self):
        for dataset_id in [self.merged_dataset1_id, self.merged_dataset2_id]:
            merged_dataset = Dataset.find_one(dataset_id)
            merged_dframe = merged_dataset.dframe(keep_parent_ids=True)
            for _, row in merged_dframe.iterrows():
                self.assertTrue(PARENT_DATASET_ID in row.keys())

        self._verify_dataset(self.dataset1_id, "updates/update1/dataset1.pkl")
        self._verify_dataset(self.merged_dataset1_id, "updates/update1/merged_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/update1/merged_dataset2.pkl")

    def test_datasets_update1(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()

    def test_datasets_update1_and_update2(self):
        self._put_row_updates(self.dataset1_id)
        self._test_update1()
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(self.merged_dataset1_id, "updates/update2/merged_dataset1.pkl")
        self._verify_dataset(self.aggregated_dataset1_id, "updates/update2/linked_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/update2/merged_dataset2.pkl")

    def test_datasets_update_merged(self):
        self._put_row_updates(self.merged_dataset1_id)
        self._verify_dataset(self.merged_dataset1_id, "updates/update_merged/merged_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/update_merged/merged_dataset2.pkl")

    def test_datasets_update_aggregated_dataset(self):
        self._put_row_updates(dataset_id=self.aggregated_dataset1_id, file_name="updates/update_agg/update.json")
        self._verify_dataset(self.aggregated_dataset1_id, "updates/update_agg/linked_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/update_agg/merged_dataset2.pkl")
        self._put_row_updates(self.dataset2_id)
        self._verify_dataset(self.dataset2_id, "updates/update_agg2/dataset2.pkl")
        self._verify_dataset(self.aggregated_dataset1_id, "updates/update_agg2/linked_dataset1.pkl")
        self._verify_dataset(self.merged_dataset2_id, "updates/update_agg2/merged_dataset2.pkl")
 def _add_common_calculations(self):
     self.calculations = Calculations()
     self.calculations.create(self.dataset2_id, 'amount + gps_alt',
                              'amount plus gps_alt')