Python Bundle.clean Examples

Programming Language: Python

Namespace/Package Name: bundles.testbundle.bundle

Class/Type: Bundle

Method/Function: clean

Examples at hotexamples.com: 13

Python Bundle.clean - 13 examples found. These are the top rated real world Python examples of bundles.testbundle.bundle.Bundle.clean extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Bundle(18)

prepare(6)

clean(5)

post_prepare(5)

pre_prepare(5)

post_build(4)

pre_build(4)

build(3)

close(3)

exit_on_fatal(3)

build_db_inserter_codes(2)

build_hdf(1)

init_log_rate(1)

log(1)

update_configuration(1)

Example #1

Show file

    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(
                PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])

Example #2

Show file

File: test_identity.py Project: nmb10/ambry

    def x_test_rewrite(self):
        from  testbundle.bundle import Bundle
        import json
        from ambry.run import get_runconfig

        # Prepare to rewrite the bundle.yaml file.
        bundle = Bundle()
        orig = os.path.join(bundle.bundle_dir,'bundle.yaml')
        save = os.path.join(bundle.bundle_dir,'bundle.yaml.save')

        try:
            os.rename(orig,save)

            print 'Write to ', orig
            with open(orig,'w') as f:
                f.write(json.dumps(
                    {
                        "identity":{
                            "dataset": "dataset1",
                            "id": "dfoo",
                            "revision": 100,
                            "source": "source1",
                            "subset": "subset1",
                            "variation": "variation1",
                            "version": "1.0.1",
                            "vid": "dfob001",
                        },
                        "about": {
                            "author": "*****@*****.**"
                        }
                    }
                ))

            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.clean()
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare() # Does the rewrite, adding the 'names'

            # Need to clear and reload one more time for the 'names' to appear
            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.exit_on_fatal = False

            self.assertEquals('dataset1', bundle.config.identity.dataset)
            self.assertEquals('dfoo', bundle.config.identity.id)
            self.assertEquals(100, bundle.config.identity.revision)

            self.assertEquals("source1-dataset1-subset1-variation1-1.0.100~dfoo01C", bundle.config.names.fqname)

            self.assertEquals("*****@*****.**", bundle.config.about.author)

        finally:
            os.rename(save, orig)
            self.delete_bundle()

Example #3

Show file

File: test_bundle.py Project: kball/ambry

    def test_build_bundle_hdf(self):

        bundle = Bundle()
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_hdf()
        bundle.post_build()

Example #4

Show file

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

Example #5

Show file

File: test_bundle.py Project: CivicVision/ambry

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

Example #6

Show file

File: test_bundle.py Project: Gudinya/ambry

    def test_build_bundle(self):  
        import shutil
              
        bundle = Bundle()
        
        shutil.copyfile(
                bundle.filesystem.path('meta','schema-edit-me.csv'),
                bundle.filesystem.path('meta','schema.csv'))
        
        #try:
        bundle.database.enable_delete   = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:
            
            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests. 

            shutil.copyfile(
                    bundle.filesystem.path('meta','schema-edit-me.csv'),
                    bundle.filesystem.path('meta','schema.csv'))      


            bundle.clean()
            bundle = Bundle()   
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

Example #7

Show file

    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        #try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests.

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

Example #8

Show file

File: test_bundle.py Project: Gudinya/ambry

    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])

Example #9

Show file

File: test_identity.py Project: CivicVision/ambry

    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()

        bp = bundle.partitions


        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space=None))

        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))

        pnq = PartitionNameQuery(table = 'tone', time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-tone-t1-s1-0.0.1',
                          u'source-dataset-subset-variation-tone-t2-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6, len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close()  # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space=None))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1', space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        p = bp.new_db_partition(table = 'tone',time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Ok! Build!
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001', bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation', bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1', bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', bundle.identity.fqname)

Example #10

Show file

class Test(TestBase):
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

    def test_db_bundle(self):

        from ambry.bundle import BuildBundle, DbBundle

        b = BuildBundle(self.bundle_dir)
        b.clean()

        self.assertTrue(b.identity.id_ is not None)
        self.assertEquals('source-dataset-subset-variation', b.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',
                          b.identity.vname)

        b.database.create()

        db_path = b.database.path

        dbb = DbBundle(db_path)

        self.assertEqual("source-dataset-subset-variation", dbb.identity.sname)
        self.assertEqual("source-dataset-subset-variation-0.0.1",
                         dbb.identity.vname)

    def test_paths(self):
        ''' Test that a build bundle and a db bundle both produce the same paths. '''

        from ambry.bundle import DbBundle

        b = self.bundle
        db = DbBundle(b.database.path)

        self.assertEqual(b.path, db.path)
        self.assertTrue(os.path.exists(b.path))

        self.assertEqual(b.database.path, db.database.path)
        self.assertTrue(os.path.exists(b.database.path))

        self.assertEqual(b.identity.path, db.identity.path)

        for p in zip(b.partitions, db.partitions):
            self.assertTrue(bool(p[0].path))
            self.assertEqual(p[0].path, p[1].path)
            self.assertTrue(bool(p[0].path))

    def test_schema_direct(self):
        '''Test adding tables directly to the schema'''

        # If we don't explicitly set the id_, it will change for every run.
        self.bundle.metadata.identity.id = 'aTest'

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema
            s.add_table('table 1', altname='alt name a')
            s.add_table('table 2', altname='alt name b')

            self.assertRaises(Exception, s.add_table, ('table 1', ))

            t = s.add_table('table 3', altname='alt name')

            s.add_column(t, 'col 1', altname='altname1')
            s.add_column(t, 'col 2', altname='altname2')
            s.add_column(t, 'col 3', altname='altname3')

        #print self.bundle.schema.as_csv()

        self.assertIn('tiEGPXmDC801',
                      [t.id_ for t in self.bundle.schema.tables])
        self.assertIn('tiEGPXmDC802',
                      [t.id_ for t in self.bundle.schema.tables])
        self.assertNotIn('cTest03', [t.id_ for t in self.bundle.schema.tables])

        t = self.bundle.schema.table('table_3')

        self.assertIn('ciEGPXmDC803001', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803002', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803003', [c.id_ for c in t.columns])

        # Try with a nested session, b/c we need to test it somewhere ...
        with self.bundle.session:

            with self.bundle.session:

                t = s.add_table('table 4', altname='alt name')

                s.add_column(t, 'col 1', altname='altname1')
                s.add_column(t, 'col 2', altname='altname2')
                s.add_column(t, 'col 3', altname='altname3')

    def x_test_generate_schema(self):
        '''Uses the generateSchema method in the bundle'''
        from ambry.orm import Column

        with self.bundle.session:
            s = self.bundle.schema
            s.clean()

            t1 = s.add_table('table1')

            s.add_column(t1, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t1, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t1, name='col3', datatype=Column.DATATYPE_TEXT)

            t2 = s.add_table('table2')
            s.add_column(t2, name='col1')
            s.add_column(t2, name='col2')
            s.add_column(t2, name='col3')

            t3 = s.add_table('table3')
            s.add_column(t3, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t3, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t3, name='col3', datatype=Column.DATATYPE_TEXT)

    def test_column_processor(self):
        from ambry.orm import Column
        from ambry.transform import BasicTransform, CensusTransform

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema

            t = s.add_table('table3')
            s.add_column(t,
                         name='col1',
                         datatype=Column.DATATYPE_INTEGER,
                         default=-1,
                         illegal_value='999')
            s.add_column(t, name='col2', datatype=Column.DATATYPE_TEXT)
            s.add_column(t, name='col3', datatype=Column.DATATYPE_REAL)

            c1 = t.column('col1')

            self.assertEquals(1, BasicTransform(c1)({'col1': ' 1 '}))

            with self.assertRaises(ValueError):
                print "PROCESSOR '{}'".format(
                    CensusTransform(c1)({
                        'col1': ' B '
                    }))

            self.assertEquals(1, CensusTransform(c1)({'col1': ' 1 '}))
            self.assertEquals(-1, CensusTransform(c1)({'col1': ' 999 '}))
            self.assertEquals(-3, CensusTransform(c1)({'col1': ' # '}))
            self.assertEquals(-2, CensusTransform(c1)({'col1': ' ! '}))

    def test_validator(self):

        #
        # Validators
        #

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),
            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', 1, -1, 3, 3.14)),
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator()

            if truth:
                self.assertTrue(
                    vd(row), "Test {} not 'true' for table '{}': {}".format(
                        i + 1, table_name, row))

            else:
                self.assertFalse(
                    vd(row), "Test {} not 'false' for table '{}': {}".format(
                        i + 1, table_name, row))

        # Testing the "OR" join of multiple columns.

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),  #1
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 0)),  #5
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 0)),  #8
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),
            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),  #10
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),  #11
            ('all', True, (None, 'text1', 'text2', -1, 2, 3, 3.14)),  #12
            ('all', True, (None, 'text1', 'text2', 1, -1, 3, 3.14)),  #13
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator(and_join=False)
            if truth:
                self.assertTrue(
                    vd(row), "Test {} not 'true' for table '{}': {}".format(
                        i + 1, table_name, row))
            else:
                self.assertFalse(
                    vd(row), "Test {} not 'false' for table '{}': {}".format(
                        i + 1, table_name, row))

        # Test the hash functions. This test depends on the d_test values in geoschema.csv
        tests = [('tone', 'A|1|', (None, 'A', 1, 2)),
                 ('ttwo', '1|2|', (None, 'B', 1, 2)),
                 ('tthree', 'C|2|', (None, 'C', 1, 2))]

        import hashlib

        for i, test in enumerate(tests):
            table_name, hashed_str, row = test
            table = self.bundle.schema.table(table_name)

            m = hashlib.md5()
            m.update(hashed_str)

            self.assertEquals(int(m.hexdigest()[:14], 16), table.row_hash(row))

    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionNameQuery
        from ambry.partition.csv import CsvPartition

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(time=10,
                                                    space=10,
                                                    data={'pid': 'pid1'})

        self.assertEqual(1, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(time=10, space=10)
        p.database.create(
        )  # Find will go to the library if the database doesn't exist.
        self.assertEqual(1, len(self.bundle.partitions.all))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        pnq3 = PartitionNameQuery(space=10)

        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)

        bundle = Bundle()
        p = bundle.partitions.find(pnq3)

        self.assertEquals('bar', p.data['foo'])

        #p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        #self.assertTrue(p is not None)
        #self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)

        #
        # Create all possible combinations of partition names
        #

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        with self.bundle.session as s:

            s.commit()

            # These two deletey bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database,
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)

            for p in self.bundle.dataset.partitions:
                # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since
                # that table doesn't exist in the bundle, only in the library
                s.execute("DELETE FROM partitions WHERE p_vid = :vid",
                          {'vid': p.vid})
                #s.delete(p)

    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(
                PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])

    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig((os.path.join(self.bundle_dir,
                                         'test-run-config.yaml'),
                            RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3',
                          l['filesystem']['upstream']['upstream']['_name'])
        self.assertEquals(
            'devtest.sandiegodata.org',
            l['filesystem']['upstream']['upstream']['account']['_name'])

    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        #try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests.

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

    def test_config_update(self):

        bundle = Bundle()

        bundle.update_configuration()

    def test_session(self):

        import uuid

        b = self.bundle

        uv = str(uuid.uuid4())

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv)

        b.close()

        self.assertEqual(uv, b.get_value('test', 'uuid').value)

        uv2 = str(uuid.uuid4())

        self.assertNotEqual(uv, uv2)

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv2)

        self.assertEqual(uv2, b.get_value('test', 'uuid').value)

        b.set_value('test', 'uuid', uv2)

Example #11

Show file

    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()

        bp = bundle.partitions

        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space=None))


        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))

        pnq = PartitionNameQuery(time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-t2-s1-0.0.1',
                          u'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6,len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close() # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space=None))
        bp._new_partition(PartialPartitionName(time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        p = bp.new_db_partition(time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)


        p = bp.find_or_new_geo(time = 't2', space='s1')

        # Which it is depends on whether GDAL is installed.
        self.assertIn(p.identity.fqname,[
            'source-dataset-subset-variation-t2-s1-geo-0.0.1~piEGPXmDC8003001',
            'source-dataset-subset-variation-t2-s1-0.0.1~piEGPXmDC8003001' ]
        )


        # Ok! Build!

        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001',bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation',bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',bundle.identity.fqname)

Example #12

Show file

File: test_bundle.py Project: CivicVision/ambry

class Test(TestBase):
    def setUp(self):

        super(Test, self).setUp()

        self.copy_or_build_bundle()

        self.bundle = Bundle()
        self.bundle_dir = self.bundle.bundle_dir

    def test_db_bundle(self):

        from ambry.bundle import BuildBundle, DbBundle

        b = BuildBundle(self.bundle_dir)
        b.clean()

        self.assertTrue(b.identity.id_ is not None)
        self.assertEquals('source-dataset-subset-variation', b.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1', b.identity.vname)

        b.database.create()

        db_path = b.database.path

        dbb = DbBundle(db_path)

        self.assertEqual("source-dataset-subset-variation", dbb.identity.sname)
        self.assertEqual("source-dataset-subset-variation-0.0.1", dbb.identity.vname)

    def test_paths(self):
        """ Test that a build bundle and a db bundle both produce the same paths. """

        from ambry.bundle import DbBundle

        b = self.bundle
        db = DbBundle(b.database.path)

        self.assertEqual(b.path, db.path)
        self.assertTrue(os.path.exists(b.path))

        self.assertEqual(b.database.path, db.database.path)
        self.assertTrue(os.path.exists(b.database.path))

        self.assertEqual(b.identity.path, db.identity.path)

        for p in zip(b.partitions, db.partitions):
            self.assertTrue(bool(p[0].path))
            self.assertEqual(p[0].path, p[1].path)
            self.assertTrue(bool(p[0].path))

    def test_schema_direct(self):
        """Test adding tables directly to the schema"""

        # If we don't explicitly set the id_, it will change for every run.
        self.bundle.metadata.identity.id = 'aTest'

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema
            s.add_table('table 1', altname='alt name a')
            s.add_table('table 2', altname='alt name b')

            self.assertRaises(Exception, s.add_table, ('table 1', ))

            t = s.add_table('table 3', altname='alt name')

            s.add_column(t, 'col 1', altname='altname1')
            s.add_column(t, 'col 2', altname='altname2')
            s.add_column(t, 'col 3', altname='altname3')

        # print self.bundle.schema.as_csv()

        self.assertIn('tiEGPXmDC801', [t.id_ for t in self.bundle.schema.tables])
        self.assertIn('tiEGPXmDC802', [t.id_ for t in self.bundle.schema.tables])
        self.assertNotIn('cTest03', [t.id_ for t in self.bundle.schema.tables])

        t = self.bundle.schema.table('table_3')

        self.assertIn('ciEGPXmDC803001', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803002', [c.id_ for c in t.columns])
        self.assertIn('ciEGPXmDC803003', [c.id_ for c in t.columns])

        # Try with a nested session, b/c we need to test it somewhere ... 
        with self.bundle.session:
            with self.bundle.session:
                t = s.add_table('table 4', altname='alt name')

                s.add_column(t, 'col 1', altname='altname1')
                s.add_column(t, 'col 2', altname='altname2')
                s.add_column(t, 'col 3', altname='altname3')

    def x_test_generate_schema(self):
        """Uses the generateSchema method in the bundle"""
        from ambry.orm import Column

        with self.bundle.session:
            s = self.bundle.schema
            s.clean()

            t1 = s.add_table('table1')

            s.add_column(t1, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t1, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t1, name='col3', datatype=Column.DATATYPE_TEXT)

            t2 = s.add_table('table2')
            s.add_column(t2, name='col1')
            s.add_column(t2, name='col2')
            s.add_column(t2, name='col3')

            t3 = s.add_table('table3')
            s.add_column(t3, name='col1', datatype=Column.DATATYPE_REAL)
            s.add_column(t3, name='col2', datatype=Column.DATATYPE_INTEGER)
            s.add_column(t3, name='col3', datatype=Column.DATATYPE_TEXT)

    def test_column_processor(self):
        from ambry.orm import Column
        from ambry.transform import BasicTransform, CensusTransform

        self.bundle.schema.clean()

        with self.bundle.session:
            s = self.bundle.schema

            t = s.add_table('table3')
            s.add_column(t, name='col1', datatype=Column.DATATYPE_INTEGER, default=-1, illegal_value='999')
            s.add_column(t, name='col2', datatype=Column.DATATYPE_TEXT)
            s.add_column(t, name='col3', datatype=Column.DATATYPE_REAL)

            c1 = t.column('col1')

            self.assertEquals(1, BasicTransform(c1)({'col1': ' 1 '}))

            with self.assertRaises(ValueError):
                print "PROCESSOR '{}'".format(CensusTransform(c1)({'col1': ' B '}))

            self.assertEquals(1, CensusTransform(c1)({'col1': ' 1 '}))
            self.assertEquals(-1, CensusTransform(c1)({'col1': ' 999 '}))
            self.assertEquals(-3, CensusTransform(c1)({'col1': ' # '}))
            self.assertEquals(-2, CensusTransform(c1)({'col1': ' ! '}))

    def test_validator(self):

        #
        # Validators
        #

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),

            ('ttwo', True, (None, 'DEFAULT', 0, 0)),
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),

            ('tthree', True, (None, 'DEFAULT', 0, 0)),
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),

            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', -1, 2, 3, 3.14)),
            ('all', False, (None, 'text1', 'text2', 1, -1, 3, 3.14)),
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator()

            if truth:
                self.assertTrue(vd(row), "Test {} not 'true' for table '{}': {}".format(i + 1, table_name, row))

            else:
                self.assertFalse(vd(row), "Test {} not 'false' for table '{}': {}".format(i + 1, table_name, row))

        # Testing the "OR" join of multiple columns. 

        tests = [
            ('tone', True, (None, 'VALUE', 0, 0)),  # 1
            ('tone', True, (None, 'VALUE', -1, 0)),
            ('tone', False, (None, 'DEFAULT', 0, 0)),
            ('tone', False, (None, 'DEFAULT', -1, 0)),

            ('ttwo', True, (None, 'DEFAULT', 0, 0)),  # 5
            ('ttwo', True, (None, 'DEFAULT', 0, 3.14)),
            ('ttwo', False, (None, 'DEFAULT', -1, 0)),

            ('tthree', True, (None, 'DEFAULT', 0, 0)),  # 8
            ('tthree', True, (None, 'DEFAULT', 0, 3.14)),

            ('all', True, (None, 'text1', 'text2', 1, 2, 3, 3.14)),  # 10
            ('all', False, (None, 'text1', 'text2', -1, -1, 3, 3.14)),  # 11
            ('all', True, (None, 'text1', 'text2', -1, 2, 3, 3.14)),  # 12
            ('all', True, (None, 'text1', 'text2', 1, -1, 3, 3.14)),  # 13
        ]

        for i, test in enumerate(tests):
            table_name, truth, row = test
            table = self.bundle.schema.table(table_name)
            vd = table._get_validator(and_join=False)
            if truth:
                self.assertTrue(vd(row), "Test {} not 'true' for table '{}': {}".format(i + 1, table_name, row))
            else:
                self.assertFalse(vd(row), "Test {} not 'false' for table '{}': {}".format(i + 1, table_name, row))

        # Test the hash functions. This test depends on the d_test values in geoschema.csv
        tests = [
            ('tone', 'A|1|', (None, 'A', 1, 2)),
            ('ttwo', '1|2|', (None, 'B', 1, 2)),
            ('tthree', 'C|2|', (None, 'C', 1, 2))]

        import hashlib

        for i, test in enumerate(tests):
            table_name, hashed_str, row = test
            table = self.bundle.schema.table(table_name)

            m = hashlib.md5()
            m.update(hashed_str)

            self.assertEquals(int(m.hexdigest()[:14], 16), table.row_hash(row))

    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionNameQuery

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(table='tone', time=10, space=10, data={'pid':'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(table='tone',time=10, space=10, data={'pid':'pid1'})

        self.assertEqual(1, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(table='tone',time=10, space=10)
        p.database.create() # Find will go to the library if the database doesn't exist.
        self.assertEqual(1, len(self.bundle.partitions.all))
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(table='tone',time=10, space=10))
        self.assertEquals('pid1',p.data['pid'] )

        p = self.bundle.partitions.find(table='tone',time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        pnq3 = PartitionNameQuery(space=10)

        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)

        bundle = Bundle()
        p = bundle.partitions.find(pnq3)

        self.assertEquals('bar', p.data['foo'])

        # p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        # self.assertTrue(p is not None)
        # self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)

        #
        # Create all possible combinations of partition names
        # 

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        with self.bundle.session as s:

            s.commit()

            # These two deletey bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database, 
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)

            for p in self.bundle.dataset.partitions:
                # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since
                # that table doesn't exist in the bundle, only in the library
                s.execute("DELETE FROM partitions WHERE p_vid = :vid", {'vid': p.vid})
                # s.delete(p)



        
    def test_runconfig(self):
        """Check the the RunConfig expands  the library configuration"""
        from ambry.run import get_runconfig, RunConfig

        rc = get_runconfig(
            (os.path.join(self.bundle_dir, 'test-run-config.yaml'), RunConfig.USER_CONFIG, RunConfig.USER_ACCOUNTS))

        l = rc.library('library1')

        self.assertEquals('database1', l['database']['_name'])
        self.assertEquals('filesystem1', l['filesystem']['_name'])
        self.assertEquals('filesystem2', l['filesystem']['upstream']['_name'])
        self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name'])

    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        # try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests. 

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()

    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()

    def test_config_update(self):
        bundle = Bundle()
        bundle.update_configuration()

    def test_session(self):
        import uuid

        b = self.bundle

        uv = str(uuid.uuid4())

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv)

        b.close()

        self.assertEqual(uv, b.get_value('test', 'uuid').value)

        uv2 = str(uuid.uuid4())

        self.assertNotEqual(uv, uv2)

        with b.session as s1:
            with b.session as s2:
                b.set_value('test', 'uuid', uv2)

        self.assertEqual(uv2, b.get_value('test', 'uuid').value)

        b.set_value('test', 'uuid', uv2)

Example #13

Show file

File: test_bundle.py Project: kball/ambry

    def test_partition(self):
        from ambry.dbexceptions import ConflictError
        from ambry.identity import PartitionIdentity, PartitionNameQuery
        from ambry.partition.csv import CsvPartition
        from ambry.partition.hdf import HdfPartition

        self.bundle.clean()
        self.bundle.prepare()

        p = self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid':'pid1'})

        p = self.bundle.partitions.new_csv_partition(time=20, space=20, data={'pid':'pid2'})
        self.assertIsInstance(p, CsvPartition )
        p = self.bundle.partitions.find_or_new_csv(time=20, space=20)
        self.assertIsInstance(p, CsvPartition)

        p = self.bundle.partitions.new_hdf_partition(space=30, data={'pid':'pid3'})
        self.assertIsInstance(p, HdfPartition)
        p = self.bundle.partitions.find_or_new_hdf(space=30)
        self.assertIsInstance(p, HdfPartition)

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid':'pid1'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_csv_partition(time=20, space=20, data={'pid':'pid21'})

        with self.assertRaises(ConflictError):
            self.bundle.partitions.new_hdf_partition(space=30, data={'pid':'pid31'})


        self.assertEqual(3, len(self.bundle.partitions.all))

        p = self.bundle.partitions.find_or_new(time=10, space=10)
        p.database.create() # Find will go to the library if the database doesn't exist.
        self.assertEqual(3, len(self.bundle.partitions.all))
        self.assertEquals('pid1',p.data['pid'] )
      
        p = self.bundle.partitions.find_or_new_csv(time=20, space=20)
        p.database.create()  
        self.assertEquals('pid2',p.data['pid'] ) 

        p = self.bundle.partitions.find_or_new_hdf(space=30)
        self.assertEquals('pid3',p.data['pid'] ) 

        p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10))
        self.assertEquals('pid1',p.data['pid'] )

        p = self.bundle.partitions.find(time=10, space=10)
        self.assertEquals('pid1', p.data['pid'])

        p = self.bundle.partitions.find(PartitionNameQuery(time=20, space=20))
        self.assertEquals('pid2',p.data['pid'] )

        p = self.bundle.partitions.find(time=20, space=20)
        self.assertEquals('pid2',p.data['pid'] )

        pnq3 = PartitionNameQuery(space=30)

        p = self.bundle.partitions.find(pnq3)
        self.assertEquals('pid3',p.data['pid'] ) 
         
        with self.bundle.session as s:
            p = self.bundle.partitions._find_orm(pnq3).first()
            p.data['foo'] = 'bar'
            s.add(p)


        bundle = Bundle()
        p = bundle.partitions.find(pnq3)
        print p.data 
        self.assertEquals('bar',p.data['foo'] ) 

        p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf'))
        self.assertTrue(p is not None)
        self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname)
 
        #
        # Create all possible combinations of partition names
        # 

        table = self.bundle.schema.tables[0]
        
        p = (('time','time2'),('space','space3'),('table',table.name),('grain','grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i+j+1]))
                pids[pid.fqname] = pid

        
        with self.bundle.session as s:
        
            # These two deletely bits clear out all of the old
            # partitions, to avoid a conflict with the next section. We also have
            # to delete the files, since create() adds a partition record to the database, 
            # and if one already exists, it will throw an Integrity Error.
            for p in self.bundle.partitions:
                if os.path.exists(p.database.path):
                    os.remove(p.database.path)
            
            for p in self.bundle.dataset.partitions:
                s.delete(p)

        import pprint

        pprint.pprint(sorted([ pid.fqname for pid in pids.values()]))

        bundle = Bundle()
        bundle.clean()
        bundle.prepare()

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])