예제 #1
0
    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name),
             ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(
                PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])
예제 #2
0
    def copy_or_build_bundle(self):
        """Set up a clean bundle build, either by re-building the bundle, or
        by copying it from a saved bundle directory """

        # For most cases, re-set the bundle by copying from a saved version. If
        # the bundle doesn't exist and the saved version doesn't exist, 
        # build a new one. 

        bundle, marker, build_dir, save_dir = self.bundle_dirs()

        idnt = bundle.identity

        if str(idnt.name.version) != "0.0.1":
            # Rebuild the bundle if the test_library.py:test_versions
            # script didn't reset the bundle at the end
            from ambry.util import rm_rf

            rm_rf(build_dir)
            rm_rf(save_dir)

        idnt = Identity.from_dict(dict(bundle.metadata.identity))

        bundle.metadata.identity = idnt.ident_dict
        bundle.metadata.names = idnt.names_dict

        bundle.metadata.write_to_dir()

        if not os.path.exists(marker):
            global_logger.info("Build dir marker ({}) is missing".format(marker))
            # There is a good reason to create a seperate instance, 
            # but don't remember what it is ... 

            bundle.clean()
            bundle = Bundle()
            if not os.path.exists(save_dir):
                global_logger.info("Save dir is missing; re-build bundle. ")

                bundle.pre_prepare()
                bundle.prepare()
                bundle.post_prepare()

                if str(bundle.identity.name.version) != '0.0.1':
                    raise Exception("Can only save bundle if version is 0.0.1. This one is version: {} ".format(
                        bundle.identity.name.version))

                bundle.pre_build()
                bundle.build()
                bundle.post_build()

                bundle.close()

                with open(marker, 'w') as f:
                    f.write(str(time.time()))
                # Copy the newly built bundle to the save directory    
                os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format(build_dir, save_dir))

        # Always copy, just to be safe. 
        # global_logger.info(  "Copying bundle from {}".format(save_dir))
        os.system("rm -rf {0}; rsync -arv {1} {0}  > /dev/null ".format(build_dir, save_dir))
예제 #3
0
    def x_test_rewrite(self):
        from  testbundle.bundle import Bundle
        import json
        from ambry.run import get_runconfig

        # Prepare to rewrite the bundle.yaml file.
        bundle = Bundle()
        orig = os.path.join(bundle.bundle_dir,'bundle.yaml')
        save = os.path.join(bundle.bundle_dir,'bundle.yaml.save')

        try:
            os.rename(orig,save)

            print 'Write to ', orig
            with open(orig,'w') as f:
                f.write(json.dumps(
                    {
                        "identity":{
                            "dataset": "dataset1",
                            "id": "dfoo",
                            "revision": 100,
                            "source": "source1",
                            "subset": "subset1",
                            "variation": "variation1",
                            "version": "1.0.1",
                            "vid": "dfob001",
                        },
                        "about": {
                            "author": "*****@*****.**"
                        }
                    }
                ))

            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.clean()
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare() # Does the rewrite, adding the 'names'

            # Need to clear and reload one more time for the 'names' to appear
            get_runconfig.clear() # clear config cache.
            bundle = Bundle()
            bundle.exit_on_fatal = False

            self.assertEquals('dataset1', bundle.config.identity.dataset)
            self.assertEquals('dfoo', bundle.config.identity.id)
            self.assertEquals(100, bundle.config.identity.revision)

            self.assertEquals("source1-dataset1-subset1-variation1-1.0.100~dfoo01C", bundle.config.names.fqname)

            self.assertEquals("*****@*****.**", bundle.config.about.author)

        finally:
            os.rename(save, orig)
            self.delete_bundle()
예제 #4
0
파일: test_bundle.py 프로젝트: kball/ambry
    def test_build_bundle_hdf(self):

        bundle = Bundle()
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_hdf()
        bundle.post_build()
예제 #5
0
    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()
예제 #6
0
    def test_simple_build(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(
            bundle.filesystem.path('meta', 'schema-edit-me.csv'),
            bundle.filesystem.path('meta', 'schema.csv'))

        bundle.clean()

        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()

        bundle.post_prepare()
        bundle.pre_build()
        bundle.build()
        bundle.post_build()
예제 #7
0
    def test_build_bundle(self):  
        import shutil
              
        bundle = Bundle()
        
        shutil.copyfile(
                bundle.filesystem.path('meta','schema-edit-me.csv'),
                bundle.filesystem.path('meta','schema.csv'))
        
        #try:
        bundle.database.enable_delete   = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:
            
            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests. 

            shutil.copyfile(
                    bundle.filesystem.path('meta','schema-edit-me.csv'),
                    bundle.filesystem.path('meta','schema.csv'))      


            bundle.clean()
            bundle = Bundle()   
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()
예제 #8
0
    def test_build_bundle(self):
        import shutil

        bundle = Bundle()

        shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                        bundle.filesystem.path('meta', 'schema.csv'))

        #try:
        bundle.database.enable_delete = True
        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        # The second run will use the changes to the schema made in the
        # first run, due to the types errors in the  'coding' table.

        bundle.clean()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()
        bundle.close()

        try:
            pass

        finally:

            # Need to clean up to ensure that we're back to a good state.
            # This runs the normal build, which will be used by the other
            # tests.

            shutil.copyfile(
                bundle.filesystem.path('meta', 'schema-edit-me.csv'),
                bundle.filesystem.path('meta', 'schema.csv'))

            bundle.clean()
            bundle = Bundle()
            bundle.exit_on_fatal = False
            bundle.pre_prepare()
            bundle.prepare()
            bundle.post_prepare()
            bundle.pre_build()
            bundle.build()
            bundle.post_build()
예제 #9
0
    def test_partition_2(self):

        bundle = Bundle()
        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()

        table = self.bundle.schema.tables[0]

        p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4'))
        p += p
        pids = {}
        for i in range(4):
            for j in range(4):
                pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1]))
                pids[pid.fqname] = pid

        for pid in pids.values():
            part = bundle.partitions.new_db_partition(**pid.dict)
            part.create()

            parts = bundle.partitions._find_orm(PartitionNameQuery(vid=pid.vid)).all()
            self.assertIn(pid.sname, [p.name for p in parts])
예제 #10
0
    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()

        bp = bundle.partitions


        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(table = 'tone', time = 't2', space=None))

        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))

        pnq = PartitionNameQuery(table = 'tone', time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-tone-t1-s1-0.0.1',
                          u'source-dataset-subset-variation-tone-t2-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6, len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(table = 'tone',time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close()  # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(table = 'tone',time = 't1', space=None))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(table = 'tone',time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1', space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-tone-t1-s2-0.0.1',
                              'source-dataset-subset-variation-tone-t1-0.0.1',
                              'source-dataset-subset-variation-tone-t1-s1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bundle.prepare()
        bp = bundle.partitions

        p = bp.new_db_partition(table = 'tone',time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(table = 'tone',time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-tone-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Ok! Build!
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001', bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation', bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1', bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001', bundle.identity.fqname)
예제 #11
0
    def test_bundle_build(self):

        from ambry.dbexceptions import ConflictError

        bundle = Bundle()

        # Need to clear the library, or the Bundle's pre_prepare
        # will cancel the build if this version is already installed
        bundle.library.purge()

        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()

        bp = bundle.partitions

        with bundle.session:
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't1', space=None))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s1'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space='s2'))
            bp._new_orm_partition(PartialPartitionName(time = 't2', space=None))


        with self.assertRaises(ConflictError):
            with bundle.session:
                bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1'))

        pnq = PartitionNameQuery(time=NameQuery.ANY, space='s1')

        names = [p.vname
                 for p in bp._find_orm(pnq).all()]


        self.assertEqual({u'source-dataset-subset-variation-t2-s1-0.0.1',
                          u'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()]

        self.assertEqual(6,len(names))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.NONE)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-0.0.1'},
                         set(names))

        # Start over, use a higher level function to create the partitions

        bundle.close() # Or you'll get an OperationalError
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        bp._new_partition(PartialPartitionName(time = 't1', space='s1'))
        self.assertEquals(1, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space='s2'))
        self.assertEquals(2, len(bp.all))

        bp._new_partition(PartialPartitionName(time = 't1', space=None))
        bp._new_partition(PartialPartitionName(time = 't2', space='s1'))
        bp._new_partition(PartialPartitionName(time = 't2', space='s2'))
        bp._new_partition(PartialPartitionName(time = 't2', space=None))
        self.assertEquals(6, len(bp.all))

        names = [p.vname
                 for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()]

        self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1',
                              'source-dataset-subset-variation-t1-0.0.1',
                              'source-dataset-subset-variation-t1-s1-0.0.1'},
                         set(names))


        # Start over, use a higher level function to create the partitions
        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False
        bundle.clean()
        bundle.database.create()
        bp = bundle.partitions

        p = bp.new_db_partition(time = 't1', space='s1')
        self.assertEquals('source-dataset-subset-variation-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname)

        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)

        # Duplicate
        p = bp.find_or_new(time = 't1', space='s2')
        self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname)


        p = bp.find_or_new_geo(time = 't2', space='s1')

        # Which it is depends on whether GDAL is installed.
        self.assertIn(p.identity.fqname,[
            'source-dataset-subset-variation-t2-s1-geo-0.0.1~piEGPXmDC8003001',
            'source-dataset-subset-variation-t2-s1-0.0.1~piEGPXmDC8003001' ]
        )


        # Ok! Build!

        bundle.close()
        bundle = Bundle()
        bundle.exit_on_fatal = False

        bundle.clean()
        bundle.pre_prepare()
        bundle.prepare()
        bundle.post_prepare()
        bundle.pre_build()
        bundle.build_db_inserter_codes()
        bundle.post_build()

        self.assertEquals('diEGPXmDC8001',bundle.identity.vid)
        self.assertEquals('source-dataset-subset-variation',bundle.identity.sname)
        self.assertEquals('source-dataset-subset-variation-0.0.1',bundle.identity.vname)
        self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',bundle.identity.fqname)
예제 #12
0
    def copy_or_build_bundle(self):
        """Set up a clean bundle build, either by re-building the bundle, or
        by copying it from a saved bundle directory """

        # For most cases, re-set the bundle by copying from a saved version. If
        # the bundle doesn't exist and the saved version doesn't exist,
        # build a new one.

        bundle, marker, build_dir, save_dir = self.bundle_dirs()

        idnt = bundle.identity

        if str(idnt.name.version) != "0.0.1":
            # Rebuild the bundle if the test_library.py:test_versions
            # script didn't reset the bundle at the end
            from ambry.util import rm_rf
            rm_rf(build_dir)
            rm_rf(save_dir)

        idnt = Identity.from_dict(dict(bundle.metadata.identity))

        bundle.metadata.identity = idnt.ident_dict
        bundle.metadata.names = idnt.names_dict

        bundle.metadata.write_to_dir()

        if not os.path.exists(marker):
            global_logger.info(
                "Build dir marker ({}) is missing".format(marker))
            # There is a good reason to create a seperate instance,
            # but don't remember what it is ...

            bundle.clean()
            bundle = Bundle()
            if not os.path.exists(save_dir):
                global_logger.info("Save dir is missing; re-build bundle. ")

                bundle.pre_prepare()
                bundle.prepare()
                bundle.post_prepare()

                if str(bundle.identity.name.version) != '0.0.1':
                    raise Exception(
                        "Can only save bundle if version is 0.0.1. This one is version: {} "
                        .format(bundle.identity.name.version))

                bundle.pre_build()
                bundle.build()
                bundle.post_build()

                bundle.close()

                with open(marker, 'w') as f:
                    f.write(str(time.time()))
                # Copy the newly built bundle to the save directory
                os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format(
                    build_dir, save_dir))

        # Always copy, just to be safe.
        #global_logger.info(  "Copying bundle from {}".format(save_dir))
        os.system("rm -rf {0}; rsync -arv {1} {0}  > /dev/null ".format(
            build_dir, save_dir))