def test_build_schema(self): illegal_col_regex = re.compile(r'\W|[A-Z]') for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) Dataset.build_schema(dataset, self.test_data[dataset_name].dtypes) # get dataset with new schema dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) for key in [CREATED_AT, SCHEMA, UPDATED_AT]: self.assertTrue(key in dataset.keys()) df_columns = self.test_data[dataset_name].columns.tolist() seen_columns = [] for column_name, column_attributes in dataset[SCHEMA].items(): # check column_name is unique self.assertFalse(column_name in seen_columns) seen_columns.append(column_name) # check column name is only legal chars self.assertFalse(illegal_col_regex.search(column_name)) # check has require attributes self.assertTrue(SIMPLETYPE in column_attributes) self.assertTrue(OLAP_TYPE in column_attributes) self.assertTrue(LABEL in column_attributes) # check label is an original column self.assertTrue(column_attributes[LABEL] in df_columns) df_columns.remove(column_attributes[LABEL]) # ensure all columns in df_columns have store columns self.assertTrue(len(df_columns) == 0)
def test_update(self): for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) self.assertFalse('field' in dataset) Dataset.update(dataset, {'field': {'key': 'value'}}) dataset = Dataset.find_one(self.test_dataset_ids[dataset_name]) self.assertTrue('field' in dataset) self.assertEqual(dataset['field'], {'key': 'value'})
def create_dataset_from_csv(csv_file): """ Create a dataset from the uploaded .csv file. """ dataset_id = uuid.uuid4().hex dataset = Dataset.create(dataset_id) # need to write out to a named tempfile in order # to get a handle in order for pandas read_csv with tempfile.NamedTemporaryFile() as tmpfile: tmpfile.write(read_uploaded_file(csv_file)) import_dataset(tmpfile.name, dataset) return {ID: dataset_id}
def create_dataset_from_url(url, allow_local_file=False): """ Load a URL, read from a CSV, create a dataset and return the unique ID. """ _file = None try: _file = open_data_file(url, allow_local_file) except (IOError, urllib2.HTTPError): # error reading file/url, return pass if not _file: # could not get a file handle return {ERROR: 'could not get a filehandle for: %s' % url} dataset_id = uuid.uuid4().hex dataset = Dataset.create(dataset_id) import_dataset(_file, dataset) return {ID: dataset_id}
def test_create(self): for dataset_name in self.TEST_DATASETS: dataset = Dataset.create(self.test_dataset_ids[dataset_name]) self.assertTrue(isinstance(dataset, dict))