Beispiel #1
0
class TestFileServer(unittest.TestCase):
    def setUp(self):
        """Create an empty file server repository."""
        # Drop project descriptor directory
        if os.path.isdir(SERVER_DIR):
            shutil.rmtree(SERVER_DIR)
        # Setup project repository
        self.db = DefaultFileServer(SERVER_DIR)

    def tearDown(self):
        """Clean-up by dropping file server directory.
        """
        shutil.rmtree(SERVER_DIR)

    def test_delete_file(self):
        """Test delete file method."""
        f = self.db.upload_file(CSV_FILE)
        f = self.db.get_file(f.identifier)
        self.assertIsNotNone(f)
        self.assertTrue(self.db.delete_file(f.identifier))
        f = self.db.get_file(f.identifier)
        self.assertIsNone(f)

    def test_get_file(self):
        """Test file get method."""
        f = self.db.upload_file(CSV_FILE)
        f = self.db.get_file(f.identifier)
        self.assertEquals(f.columns, 3)
        self.assertEquals(f.rows, 2)
        self.assertEquals(f.name, 'dataset.csv')
        # Ensure that the file parses as a zipped TSV file
        with f.open() as csvfile:
            rows = 0
            for row in csv.reader(csvfile, delimiter=f.delimiter):
                self.assertEquals(len(row), f.columns)
                rows += 1
        self.assertEquals(rows - 1, f.rows)

    def test_list_file(self):
        """Test upload of different file types and the list files method."""
        fh = self.db.upload_file(CSV_FILE)
        self.assertFalse(fh.compressed)
        self.assertEquals(fh.delimiter, ',')
        fh = self.db.upload_file(GZIP_CSV_FILE)
        self.assertTrue(fh.compressed)
        self.assertEquals(fh.delimiter, ',')
        fh = self.db.upload_file(TSV_FILE)
        self.assertFalse(fh.compressed)
        self.assertEquals(fh.delimiter, '\t')
        fh = self.db.upload_file(GZIP_TSV_FILE)
        self.assertTrue(fh.compressed)
        self.assertEquals(fh.delimiter, '\t')
        files = self.db.list_files()
        self.assertEquals(len(files), 4)
        # Ensure that each of the files parses as a zipped TSV file
        for f in files:
            with f.open() as csvfile:
                rows = 0
                for row in csv.reader(csvfile, delimiter=f.delimiter):
                    self.assertEquals(len(row), f.columns)
                    rows += 1
            self.assertEquals(rows - 1, f.rows)

    def test_rename_file(self):
        """Test rename file method."""
        f = self.db.upload_file(CSV_FILE)
        f = self.db.get_file(f.identifier)
        f = self.db.rename_file(f.identifier, 'somename')
        self.assertEquals(f.name, 'somename')
        f = self.db.get_file(f.identifier)
        f = self.db.rename_file(f.identifier, 'somename')
        self.assertEquals(f.name, 'somename')
        f = self.db.rename_file(f.identifier, 'somename')
        self.assertEquals(f.name, 'somename')

    def test_upload_file(self):
        """Test file upload."""
        f = self.db.upload_file(CSV_FILE)
        self.assertEquals(f.columns, 3)
        self.assertEquals(f.rows, 2)
        self.assertEquals(f.name, 'dataset.csv')
Beispiel #2
0
class TestLoadMimirDataset(unittest.TestCase):

    def setUp(self):
        """Create an empty work trails repository."""
        # Cleanup first
        self.cleanUp()
        self.datastore = MimirDataStore(DATASTORE_DIR)
        self.fileserver = DefaultFileServer(FILESERVER_DIR)
        self.vizual = MimirVizualEngine(self.datastore, self.fileserver)

    def tearDown(self):
        """Clean-up by deleting directories.
        """
        self.cleanUp()

    def cleanUp(self):
        """Remove datastore and fileserver directory."""
        # Delete directories
        for d in [DATASTORE_DIR, FILESERVER_DIR]:
            if os.path.isdir(d):
                shutil.rmtree(d)

    def test_load(self):
        """Run workflow with default configuration."""
        mimir.initialize()
        self.update_cell(CSV_FILE, 2, 0, 'int', 10)
        self.update_cell(CSV_FILE, 2, 0, 'int', 10.3, result_type='real')
        self.update_cell(CSV_FILE, 2, 0, 'int', None)
        self.update_cell(CSV_FILE, 3, 0, 'real', 10.3)
        self.update_cell(CSV_FILE, 3, 0, 'real', 10, result_value=10.0)
        self.update_cell(CSV_FILE, 3, 0, 'real', 'A', result_type='varchar')
        self.update_cell(CSV_FILE, 3, 0, 'real', None)
        self.update_cell(CSV_FILE, 4, 0, 'varchar', 'A')
        self.update_cell(CSV_FILE, 4, 0, 'varchar', 10, result_value='10')
        self.update_cell(CSV_FILE, 4, 0, 'varchar', 10.87, result_value='10.87')
        self.update_cell(CSV_FILE, 4, 0, 'varchar', None)
        self.update_cell(CSV_FILE, 8, 0, 'bool', 'False', result_value=False)
        self.update_cell(CSV_FILE, 8, 0, 'bool', '0', result_value=False)
        self.update_cell(CSV_FILE, 8, 0, 'bool', None)
        self.update_cell(CSV_FILE, 8, 1, 'bool', True, result_value=True)
        self.update_cell(CSV_FILE, 8, 1, 'bool', '1', result_value=True)
        self.update_cell(CSV_FILE, 8, 1, 'bool', 'A', result_value='A', result_type='varchar')
        self.update_cell(CSV_FILE, 8, 1, 'bool', 10.87, result_value='10.87', result_type='varchar')
        self.update_cell(CSV_FILE_DT, 1, 0, 'date', '2018-05-09')
        self.update_cell(CSV_FILE_DT, 1, 0, 'date', '20180509', result_value='20180509', result_type='varchar')
        self.update_cell(CSV_FILE_DT, 1, 0, 'date', None)
        self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', '2018-05-09 12:03:22.0000')
        self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', 'ABC', result_value='ABC', result_type='varchar')
        self.update_cell(CSV_FILE_DT, 0, 0, 'datetime', None)
        mimir.finalize()

    def update_cell(self, filename, col, row, data_type, value, result_value=None, result_type=None):
        """Update the value of the given cell. The column data type is expected
        to match the given datatype. The optional result value is the expected
        value of the cell in the modified dataset.
        """
        f_handle = self.fileserver.upload_file(filename)
        ds = self.datastore.load_dataset(f_handle)
        #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns]
        self.assertEquals(ds.columns[col].data_type, data_type)
        rows = ds.fetch_rows()
        self.assertNotEquals(rows[row].values[col], value)
        _, ds_id = self.vizual.update_cell(ds.identifier, col, row, value)
        ds = self.datastore.get_dataset(ds_id)
        #print [c.name_in_rdb + ' AS ' + c.name + '(' + c.data_type + ')' for c in ds.columns]
        if result_type is None:
            self.assertEquals(ds.columns[col].data_type, data_type)
        else:
            self.assertEquals(ds.columns[col].data_type, result_type)
        rows = ds.fetch_rows()
        if result_value is None:
            self.assertEquals(rows[row].values[col], value)
        else:
            self.assertEquals(rows[row].values[col], result_value)
        self.fileserver.delete_file(f_handle.identifier)