Ejemplo n.º 1
0
class TestCalculator(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(
            self.test_dataset_ids['good_eats_with_calculations.csv'])
        dframe = recognize_dates(
            self.get_data('good_eats_with_calculations.csv'))
        self.dataset.save_observations(dframe)
        self.group = None
        self.parser = Parser(self.dataset)
        self.places = 5

    def _equal_msg(self, calculated, stored, formula):
        return '(calculated %s) %s != (stored %s) %s ' % (type(calculated),
               calculated, type(stored), stored) +\
            '(within %s places), formula: %s' % (self.places, formula)

    def _test_calculator(self):
        self.dframe = self.dataset.dframe()
        row = self.dframe.irow(0)

        columns = self.dframe.columns.tolist()
        self.start_num_cols = len(columns)
        self.added_num_cols = 0

        column_labels_to_slugs = {
            column_attrs[Dataset.LABEL]: (column_name) for
            (column_name, column_attrs) in self.dataset.schema.items()
        }
        self.label_list, self.slugified_key_list = [
            list(ary) for ary in zip(*column_labels_to_slugs.items())
        ]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx
            self.parser.validate_formula(formula, row)

            calculator = Calculator(self.dataset)

            groups = self.dataset.split_groups(self.group)
            calculation = Calculation()
            calculation.save(self.dataset, formula, name, self.group)
            calculator.calculate_columns([calculation])

            self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs

            self._test_calculation_results(name, formula)
Ejemplo n.º 2
0
    def test_dframe(self):
        dataset = Dataset()
        dataset.save(self.test_dataset_ids['good_eats.csv'])
        dataset.save_observations(
            recognize_dates(self.get_data('good_eats.csv')))
        records = [x for x in Observation.find(dataset)]
        dframe = dataset.dframe()

        self.assertTrue(isinstance(dframe, DataFrame))
        self.assertTrue(all(self.get_data('good_eats.csv').reindex(
                        columns=dframe.columns).eq(dframe)))
        columns = dframe.columns
        # ensure no reserved keys
        for key in MONGO_RESERVED_KEY_STRS:
            self.assertFalse(key in columns)
        # ensure date is converted
        self.assertTrue(isinstance(dframe.submit_date[0], datetime))
Ejemplo n.º 3
0
class TestCalculator(TestBase):
    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(
            self.test_dataset_ids['good_eats_with_calculations.csv'])
        dframe = recognize_dates(
            self.get_data('good_eats_with_calculations.csv'))
        self.dataset.save_observations(dframe)
        self.group = None
        self.places = 5

    def _equal_msg(self, calculated, stored, formula):
        return '(calculated %s) %s != (stored %s) %s ' % (type(calculated),
               calculated, type(stored), stored) +\
            '(within %s places), formula: %s' % (self.places, formula)

    def _test_calculator(self):
        self.dframe = self.dataset.dframe()

        columns = self.dframe.columns.tolist()
        self.start_num_cols = len(columns)
        self.added_num_cols = 0

        column_labels_to_slugs = {
            column_attrs[Dataset.LABEL]: (column_name)
            for (column_name, column_attrs) in self.dataset.schema.items()
        }
        self.label_list, self.slugified_key_list = [
            list(ary) for ary in zip(*column_labels_to_slugs.items())
        ]

        for idx, formula in enumerate(self.calculations):
            name = 'test-%s' % idx

            Parser.validate_formula(formula, self.dataset)

            calculation = Calculation()
            calculation.save(self.dataset, formula, name, self.group)
            self.now = now()
            calculate_columns(self.dataset, [calculation])

            self.column_labels_to_slugs = self.dataset.schema.labels_to_slugs

            self._test_calculation_results(name, formula)
Ejemplo n.º 4
0
class TestObservation(TestBase):

    def setUp(self):
        TestBase.setUp(self)
        self.dataset = Dataset()
        self.dataset.save(self.test_dataset_ids['good_eats.csv'])
        self.query_args = QueryArgs({"rating": "delectible"})

    def __save_records(self):
        Observation.save(self.get_data('good_eats.csv'),
                         self.dataset)
        records = Observation.find(self.dataset)
        self.assertTrue(isinstance(records, list))
        self.assertTrue(isinstance(records[0], dict))
        self.assertTrue('_id' in records[0].keys())

        return records

    def __decode(self, row):
        return Observation.encode(row,
                                  encoding=Observation.decoding(self.dataset))

    def test_encoding(self):
        self.__save_records()
        encoding = Observation.encoding(self.dataset)

        for column in self.dataset.dframe().columns:
            if column == MONGO_ID:
                column = MONGO_ID_ENCODED

            self.assertTrue(column in encoding.keys())

        for v in encoding.values():
            self.assertTrue(isinstance(int(v), int))

    def test_encode_no_dataset(self):
        records = self.__save_records()

        for record in records:
            encoded = Observation.encode(record)
            self.assertEqual(dump_mongo_json(encoded), dump_mongo_json(record))

    def test_save(self):
        records = self.__save_records()
        self.assertEqual(len(records), 19)

    def test_save_over_bulk(self):
        Observation.save(self.get_data('good_eats_large.csv'),
                         self.dataset)
        records = Observation.find(self.dataset)

        self.assertEqual(len(records), 1001)

    def test_find(self):
        self.__save_records()
        rows = Observation.find(self.dataset)

        self.assertTrue(isinstance(rows, list))

    def test_find_with_query(self):
        self.__save_records()
        rows = Observation.find(self.dataset, self.query_args)

        self.assertTrue(isinstance(rows, list))

    def test_find_with_select(self):
        self.__save_records()
        query_args = QueryArgs(select={"rating": 1})
        rows = Observation.find(self.dataset, query_args)

        self.assertTrue(isinstance(rows, list))

        row = self.__decode(rows[0])

        self.assertEquals(sorted(row.keys()), ['_id', 'rating'])

    def test_find_with_select_and_query(self):
        self.__save_records()
        self.query_args.select = {"rating": 1}
        rows = Observation.find(self.dataset, self.query_args)
        self.assertTrue(isinstance(rows, list))

        row = self.__decode(rows[0])

        self.assertEquals(sorted(row.keys()), ['_id', 'rating'])

    def test_delete_all(self):
        self.__save_records()
        records = Observation.find(self.dataset)
        self.assertNotEqual(records, [])
        Observation.delete_all(self.dataset)
        records = Observation.find(self.dataset)

        self.assertEqual(records, [])

    def test_delete_one(self):
        self.__save_records()
        records = Observation.find(self.dataset)
        self.assertNotEqual(records, [])

        row = self.__decode(records[0])

        Observation.delete(self.dataset, row[INDEX])
        new_records = Observation.find(self.dataset)

        # Dump to avoid problems with nan != nan.
        self.assertEqual(dump_mongo_json(records[1:]),
                         dump_mongo_json(new_records))

    def test_delete_encoding(self):
        self.__save_records()
        encoding = Observation.encoding(self.dataset)

        self.assertTrue(isinstance(encoding, dict))

        Observation.delete_encoding(self.dataset)
        encoding = Observation.encoding(self.dataset)

        self.assertEqual(encoding, None)