Ejemplo n.º 1
0
class TestCalculator(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(
            self.test_dataset_ids['good_eats_with_calculations.csv'])
        dframe = recognize_dates(
            self.get_data('good_eats_with_calculations.csv'))
        self.dataset.save_observations(dframe)
        self.group = None
        self.parser = Parser(self.dataset)
        self.places = 5

    def _equal_msg(self, calculated, stored, formula):
        return '(calculated %s) %s != (stored %s) %s ' % (type(calculated),
               calculated, type(stored), stored) +\
            '(within %s places), formula: %s' % (self.places, formula)

    def _test_calculator(self):
        self.dframe = self.dataset.dframe()
        row = self.dframe.irow(0)

        columns = self.dframe.columns.tolist()
        self.start_num_cols = len(columns)
        self.added_num_cols = 0

        column_labels_to_slugs = {
            column_attrs[Dataset.LABEL]: (column_name) for
            (column_name, column_attrs) in self.dataset.schema.items()
        }
        self.label_list, self.slugified_key_list = [
            list(ary) for ary in zip(*column_labels_to_slugs.items())
        ]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx
            self.parser.validate_formula(formula, row)

            calculator = Calculator(self.dataset)

            groups = self.dataset.split_groups(self.group)
            calculation = Calculation()
            calculation.save(self.dataset, formula, name, self.group)
            calculator.calculate_columns([calculation])

            self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs

            self._test_calculation_results(name, formula)
Ejemplo n.º 2
0
    def test_dframe(self):
        dataset = Dataset()
        dataset.save(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        records = [x for x in Observation.find(dataset)]
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns
        # ensure no reserved keys
        for key in MONGO_RESERVED_KEY_STRS:
            self.assertFalse(key in columns)
        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Ejemplo n.º 3
0
class TestCalculator(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(
            self.test_dataset_ids['good_eats_with_calculations.csv'])
        dframe = recognize_dates(
            self.get_data('good_eats_with_calculations.csv'))
        self.dataset.save_observations(dframe)
        self.group = None
        self.places = 5

    def _equal_msg(self, calculated, stored, formula):
        return '(calculated %s) %s != (stored %s) %s ' % (type(calculated),
               calculated, type(stored), stored) +\
            '(within %s places), formula: %s' % (self.places, formula)

    def _test_calculator(self):
        self.dframe = self.dataset.dframe()

        columns = self.dframe.columns.tolist()
        self.start_num_cols = len(columns)
        self.added_num_cols = 0

        column_labels_to_slugs = {
            column_attrs[Dataset.LABEL]: (column_name)
            for (column_name, column_attrs) in self.dataset.schema.items()
        }
        self.label_list, self.slugified_key_list = [
            list(ary) for ary in zip(*column_labels_to_slugs.items())
        ]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx

            Parser.validate_formula(formula, self.dataset)

            calculation = Calculation()
            calculation.save(self.dataset, formula, name, self.group)
            self.now = now()
            calculate_columns(self.dataset, [calculation])

            self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs

            self._test_calculation_results(name, formula)
Ejemplo n.º 4
0
class TestCalculation(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(self.test_dataset_ids['good_eats.csv'])
        self.formula = 'rating'
        self.name = 'test'

    def _save_calculation(self, formula):
        if not formula:
            formula = self.formula
        return self.calculation.save(self.dataset, formula, self.name)

    def _save_observations(self):
        self.dataset.save_observations(self.get_data('good_eats.csv'))

    def _save_observations_and_calculation(self, formula=None):
        self._save_observations()
        self.calculation = Calculation()
        return self._save_calculation(formula)

    def test_save(self):
        record = self._save_observations_and_calculation()
        self.assertTrue(isinstance(record, dict))
        self.assertTrue(Calculation.FORMULA in record.keys())
        self.assertTrue(Calculation.STATE in record.keys())
        record = Calculation.find(self.dataset)[0].record
        self.assertEqual(record[Calculation.STATE], Calculation.STATE_READY)
        self.assertTrue(Calculation(record).is_ready)

    def test_save_set_status(self):
        record = self._save_observations_and_calculation()
        self.assertTrue(isinstance(record, dict))
        self.assertTrue(Calculation.FORMULA in record.keys())

    def test_save_set_aggregation(self):
        record = self._save_observations_and_calculation('max(amount)')
        calculation = Calculation.find(self.dataset)[0]
        self.assertEqual('max', calculation.aggregation)

    def test_save_set_aggregation_id(self):
        record = self._save_observations_and_calculation('max(amount)')
        agg_id = self.dataset.aggregated_datasets_dict['']
        calculation = Calculation.find(self.dataset)[0]
        self.assertEqual(agg_id, calculation.aggregation_id)

    def test_save_improper_formula(self):
        assert_raises(ParseError, self._save_observations_and_calculation,
                      'NON_EXISTENT_COLUMN')
        try:
            self._save_observations_and_calculation('NON_EXISTENT_COLUMN')
        except ParseError as e:
            self.assertTrue('Missing column' in e.__str__())

    def test_save_unparsable_formula(self):
        assert_raises(ParseError, self._save_observations_and_calculation,
                      '=NON_EXISTENT_COLUMN')
        try:
            self._save_observations_and_calculation(
                '=NON_EXISTENT_COLUMN')
        except ParseError as e:
            self.assertTrue('Parse Failure' in e.__str__())

    def test_save_improper_formula_no_data(self):
        assert_raises(ParseError, Calculation().save, self.dataset,
                      'NON_EXISTENT_COLUMN', self.name)
        try:
            Calculation().save(self.dataset, 'NON_EXISTENT_COLUMN',
                               self.name)
        except ParseError as e:
            self.assertTrue('No schema' in e.__str__())

    def test_save_unparsable_formula_no_data(self):
        assert_raises(ParseError, Calculation().save, self.dataset,
                      '=NON_EXISTENT_COLUMN', self.name)
        try:
            Calculation().save(self.dataset, '=NON_EXISTENT_COLUMN',
                               self.name)
        except ParseError as e:
            self.assertTrue('Parse Failure' in e.__str__())

    def test_save_non_existent_group(self):
        self._save_observations()
        assert_raises(ParseError, Calculation().save, self.dataset,
                      self.formula, self.name, group_str='NON_EXISTENT_GROUP')
        try:
            Calculation().save(self.dataset, self.formula, self.name,
                               group_str='NON_EXISTENT_GROUP')
        except ParseError as e:
            self.assertTrue('Group' in e.__str__())

    def test_find(self):
        record = self._save_observations_and_calculation()
        rows = Calculation.find(self.dataset)
        new_record = rows[0].record
        status = new_record.pop(Calculation.STATE)
        self.assertEqual(status, Calculation.STATE_READY)

    def test_sets_dependent_calculations(self):
        record = self._save_observations_and_calculation()
        self.name = 'test1'
        record = self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])

    def test_removes_dependent_calculations(self):
        record = self._save_observations_and_calculation()
        self.name = 'test1'
        record = self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test1')
        calculation.delete(self.dataset)
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, [])

    def test_disallow_delete_dependent_calculation(self):
        record = self._save_observations_and_calculation()
        self.name = 'test1'
        record = self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        assert_raises(DependencyError, calculation.delete, self.dataset)
class TestCalculation(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(self.test_dataset_ids['good_eats.csv'])
        self.formula = 'rating'
        self.name = 'test'

    def _save_calculation(self, formula):
        if not formula:
            formula = self.formula
        return Calculation.create(self.dataset, formula, self.name)

    def _save_observations(self):
        self.dataset.save_observations(self.get_data('good_eats.csv'))

    def _save_observations_and_calculation(self, formula=None):
        self._save_observations()
        return self._save_calculation(formula)

    def test_save(self):
        calculation = self._save_observations_and_calculation()

        self.assertTrue(isinstance(calculation, Calculation))

        record = calculation.record

        self.assertTrue(isinstance(record, dict))
        self.assertTrue(Calculation.FORMULA in record.keys())
        self.assertTrue(Calculation.STATE in record.keys())

        record = Calculation.find(self.dataset)[0].record

        self.assertEqual(record[Calculation.STATE], Calculation.STATE_READY)
        self.assertTrue(Calculation(record).is_ready)

    def test_save_set_status(self):
        record = self._save_observations_and_calculation().record

        self.assertTrue(isinstance(record, dict))
        self.assertTrue(Calculation.FORMULA in record.keys())

    def test_save_set_aggregation(self):
        calculation = self._save_observations_and_calculation('max(amount)')

        self.assertEqual('max', calculation.aggregation)

    def test_save_set_aggregation_id(self):
        calculation = self._save_observations_and_calculation('max(amount)')
        agg_id = self.dataset.aggregated_datasets_dict['']

        self.assertEqual(agg_id, calculation.aggregation_id)

    def test_save_improper_formula(self):
        assert_raises(ParseError, self._save_observations_and_calculation,
                      'NON_EXISTENT_COLUMN')
        try:
            self._save_observations_and_calculation('NON_EXISTENT_COLUMN')
        except ParseError as e:
            self.assertTrue('Missing column' in e.__str__())

    def test_save_unparsable_formula(self):
        assert_raises(ParseError, self._save_observations_and_calculation,
                      '=NON_EXISTENT_COLUMN')
        try:
            self._save_observations_and_calculation('=NON_EXISTENT_COLUMN')
        except ParseError as e:
            self.assertTrue('Parse Failure' in e.__str__())

    def test_save_improper_formula_no_data(self):
        assert_raises(ParseError,
                      Calculation().save, self.dataset, 'NON_EXISTENT_COLUMN',
                      self.name)
        try:
            Calculation().save(self.dataset, 'NON_EXISTENT_COLUMN', self.name)
        except ParseError as e:
            self.assertTrue('No schema' in e.__str__())

    def test_save_unparsable_formula_no_data(self):
        assert_raises(ParseError,
                      Calculation().save, self.dataset, '=NON_EXISTENT_COLUMN',
                      self.name)
        try:
            Calculation().save(self.dataset, '=NON_EXISTENT_COLUMN', self.name)
        except ParseError as e:
            self.assertTrue('Parse Failure' in e.__str__())

    def test_save_non_existent_group(self):
        self._save_observations()
        assert_raises(ParseError,
                      Calculation().save,
                      self.dataset,
                      self.formula,
                      self.name,
                      group_str='NON_EXISTENT_GROUP')
        try:
            Calculation().save(self.dataset,
                               self.formula,
                               self.name,
                               group_str='NON_EXISTENT_GROUP')
        except ParseError as e:
            self.assertTrue('Group' in e.__str__())

    def test_find(self):
        self._save_observations_and_calculation()
        rows = Calculation.find(self.dataset)
        new_record = rows[0].record
        status = new_record.pop(Calculation.STATE)
        self.assertEqual(status, Calculation.STATE_READY)

    def test_sets_dependent_calculations(self):
        self._save_observations_and_calculation()
        self.name = 'test1'
        self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])

    def test_removes_dependent_calculations(self):
        self._save_observations_and_calculation()
        self.name = 'test1'
        self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test1')
        calculation.delete(self.dataset)
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, [])

    def test_disallow_delete_dependent_calculation(self):
        self._save_observations_and_calculation()
        self.name = 'test1'
        self._save_calculation('test')
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        self.assertEqual(calculation.dependent_calculations, ['test1'])
        calculation = Calculation.find_one(self.dataset.dataset_id, 'test')
        assert_raises(DependencyError, calculation.delete, self.dataset)