예제 #1
0
    def test_build_schema(self):
        illegal_col_regex = re.compile(r'\W|[A-Z]')

        for dataset_name in self.TEST_DATASETS:
            dataset = Dataset.create(self.test_dataset_ids[dataset_name])
            Dataset.build_schema(dataset,
                    self.test_data[dataset_name].dtypes)

            # get dataset with new schema
            dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])

            for key in [CREATED_AT, SCHEMA, UPDATED_AT]:
                self.assertTrue(key in dataset.keys())

            df_columns = self.test_data[dataset_name].columns.tolist()
            seen_columns = []

            for column_name, column_attributes in dataset[SCHEMA].items():
                # check column_name is unique
                self.assertFalse(column_name in seen_columns)
                seen_columns.append(column_name)

                # check column name is only legal chars
                self.assertFalse(illegal_col_regex.search(column_name))
                # check has require attributes
                self.assertTrue(SIMPLETYPE in column_attributes)
                self.assertTrue(OLAP_TYPE in column_attributes)
                self.assertTrue(LABEL in column_attributes)

                # check label is an original column
                self.assertTrue(column_attributes[LABEL] in df_columns)
                df_columns.remove(column_attributes[LABEL])

            # ensure all columns in df_columns have store columns
            self.assertTrue(len(df_columns) == 0)
예제 #2
0
 def test_update(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         self.assertFalse('field' in dataset)
         Dataset.update(dataset, {'field': {'key': 'value'}})
         dataset = Dataset.find_one(self.test_dataset_ids[dataset_name])
         self.assertTrue('field' in dataset)
         self.assertEqual(dataset['field'], {'key': 'value'})
예제 #3
0
파일: io.py 프로젝트: asseym/bamboo
def create_dataset_from_csv(csv_file):
    """
    Create a dataset from the uploaded .csv file.
    """
    dataset_id = uuid.uuid4().hex
    dataset = Dataset.create(dataset_id)

    # need to write out to a named tempfile in order
    # to get a handle in order for pandas read_csv
    with tempfile.NamedTemporaryFile() as tmpfile:
        tmpfile.write(read_uploaded_file(csv_file))
        import_dataset(tmpfile.name, dataset)

    return {ID: dataset_id}
예제 #4
0
파일: io.py 프로젝트: asseym/bamboo
def create_dataset_from_url(url, allow_local_file=False):
    """
    Load a URL, read from a CSV, create a dataset and return the unique ID.
    """
    _file = None

    try:
        _file = open_data_file(url, allow_local_file)
    except (IOError, urllib2.HTTPError):
        # error reading file/url, return
        pass

    if not _file:
        # could not get a file handle
        return {ERROR: 'could not get a filehandle for: %s' % url}

    dataset_id = uuid.uuid4().hex
    dataset = Dataset.create(dataset_id)
    import_dataset(_file, dataset)

    return {ID: dataset_id}
예제 #5
0
 def test_create(self):
     for dataset_name in self.TEST_DATASETS:
         dataset = Dataset.create(self.test_dataset_ids[dataset_name])
         self.assertTrue(isinstance(dataset, dict))