Ejemplo n.º 1
0
    def test_build_schema(self):
        illegal_col_regex = re.compile(r'\W')

        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset()
            dataset.save(self.test_dataset_ids[dataset_name])
            dataset.build_schema(self.get_data(dataset_name))

            # get dataset with new schema
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            for key in [
                    Dataset.CREATED_AT, Dataset.SCHEMA, Dataset.UPDATED_AT]:
                self.assertTrue(key in dataset.record.keys())

            df_columns = self.get_data(dataset_name).columns.tolist()
            seen_columns = []

            for column_name, column_attributes in dataset.schema.items():
                # check column_name is unique
                self.assertFalse(column_name in seen_columns)
                seen_columns.append(column_name)

                # check column name is only legal chars
                self.assertFalse(illegal_col_regex.search(column_name))

                # check has require attributes
                self.assertTrue(SIMPLETYPE in column_attributes)
                self.assertTrue(OLAP_TYPE in column_attributes)
                self.assertTrue(Dataset.LABEL in column_attributes)

                # check label is an original column
                self.assertTrue(column_attributes[Dataset.LABEL] in df_columns)
                df_columns.remove(column_attributes[Dataset.LABEL])

                # check not reserved key
                self.assertFalse(column_name in MONGO_RESERVED_KEY_STRS)

            # ensure all columns in df_columns have store columns
            self.assertTrue(len(df_columns) == 0)