Beispiel #1
0
def benchmark_value_setting(num_params=10, num_insertions=100, repeat=1, bulk_ts_insert=False):
    # Instantiate a ParameterDictionary
    pdict = ParameterDictionary()

    # Create a set of ParameterContext objects to define the parameters in the coverage, add each to the ParameterDictionary
    t_ctxt = ParameterContext('time', param_type=QuantityType(value_encoding=np.dtype('int64')))
    t_ctxt.axis = AxisTypeEnum.TIME
    t_ctxt.uom = 'seconds since 01-01-1970'
    pdict.add_context(t_ctxt)

    for i in xrange(num_params-1):
        pdict.add_context(ParameterContext('param_{0}'.format(i)))

    # Construct temporal and spatial Coordinate Reference System objects
    tcrs = CRS([AxisTypeEnum.TIME])
    scrs = CRS([AxisTypeEnum.LON, AxisTypeEnum.LAT])

    # Construct temporal and spatial Domain objects
    tdom = GridDomain(GridShape('temporal', [0]), tcrs, MutabilityEnum.EXTENSIBLE) # 1d (timeline)
    sdom = GridDomain(GridShape('spatial', [0]), scrs, MutabilityEnum.IMMUTABLE) # 0d spatial topology (station/trajectory)

    import time
    counter = 1
    insert_times = []
    per_rep_times = []
    full_time = time.time()
    for r in xrange(repeat):
        # Instantiate the SimplexCoverage providing the ParameterDictionary, spatial Domain and temporal Domain
        cov = SimplexCoverage('test_data', create_guid(), 'empty sample coverage_model', parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom)

        rep_time = time.time()
        if bulk_ts_insert:
            cov.insert_timesteps(num_insertions)
        for x in xrange(num_insertions):
            in_time = time.time()
            if not bulk_ts_insert:
                cov.insert_timesteps(1)
            slice_ = slice(cov.num_timesteps - 1, None)
            cov.set_parameter_values('time', 1, tdoa=slice_)
            for i in xrange(num_params-1):
                cov.set_parameter_values('param_{0}'.format(i), 1.1, tdoa=slice_)

            in_time = time.time() - in_time
            insert_times.append(in_time)
            counter += 1
        rep_time = time.time() - rep_time
        per_rep_times.append(rep_time)

        cov.close()

    print 'Average Value Insertion Time (%s repetitions): %s' % (repeat, sum(insert_times) / len(insert_times))
    print 'Average Total Expansion Time (%s repetitions): %s' % (repeat, sum(per_rep_times) / len(per_rep_times))
    print 'Full Time (includes cov creation/closing): %s' % (time.time() - full_time)

    return cov
Beispiel #2
0
    def repair(
        self,
        backup=True,
        copy_over=True,
        keep_temp=False,
        reanalyze=False,
        analyze_bricks=False,
        detailed_analysis=False,
    ):
        """
        Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's
        original construction parameters, then reconstructs the Master and Parameter metadata
        files by inspection of the ION objects and "valid" brick files.
        @return:
        """
        if self._ar is None or reanalyze:
            self._ar = self._do_analysis(analyze_bricks=analyze_bricks, detailed_analysis=detailed_analysis)

        if self._ar.is_corrupt:
            if len(self._ar.get_brick_corruptions()) > 0:
                raise NotImplementedError("Brick corruption.  Cannot repair at this time!!!")
            else:
                # Repair the Master and Parameter metadata files

                # Need the ParameterDictionary, TemporalDomain and SpatialDomain
                pdict = ParameterDictionary.load(self._dso.parameter_dictionary)
                tdom = GridDomain.load(self._dso.temporal_domain)
                sdom = GridDomain.load(self._dso.spatial_domain)

                # Set up the working directory for the recovered coverage
                tempcov_dir = tempfile.mkdtemp("covs")

                # Create the temporary Coverage
                tempcov = SimplexCoverage(
                    root_dir=tempcov_dir,
                    persistence_guid=self._guid,
                    name=self._guid,
                    parameter_dictionary=pdict,
                    spatial_domain=sdom,
                    temporal_domain=tdom,
                )
                # Handle to persistence layer for tempcov
                pl = tempcov._persistence_layer

                # Set up the original and temporary coverage path strings
                orig_dir = os.path.join(self.cov_pth, self._guid)
                temp_dir = os.path.join(tempcov.persistence_dir, tempcov.persistence_guid)

                # Insert same number of timesteps into temporary coverage as in broken coverage
                brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                    self.cov_pth, self._guid, "time"
                )
                empty_cov = (
                    brick_list_spans is None
                )  # If None, there are no brick files --> no timesteps, empty coverage!
                if not empty_cov:  # If None, there are no brick files --> no timesteps, empty coverage!
                    bls = [s.value for s in brick_list_spans]
                    maxes = [sum(b[3]) for b in new_brick_list.values()]

                    # Replace metadata is the Master file
                    pl.master_manager.brick_domains = brick_domains_new
                    pl.master_manager.brick_list = new_brick_list

                    # Repair ExternalLinks to brick files
                    with HDFLockingFile(pl.master_manager.file_path, "r+") as f:
                        for param_name in pdict.keys():
                            del f[param_name]
                            f.create_group(param_name)
                    for param_name in pdict.keys():
                        for brick in bls:
                            link_path = "/{0}/{1}".format(param_name, brick[0])
                            brick_file_name = "{0}.hdf5".format(brick[0])
                            brick_rel_path = os.path.join(
                                pl.parameter_metadata[param_name].root_dir.replace(tempcov.persistence_dir, "."),
                                brick_file_name,
                            )
                            log.debug("link_path: %s", link_path)
                            log.debug("brick_rel_path: %s", brick_rel_path)
                            pl.master_manager.add_external_link(link_path, brick_rel_path, brick[0])

                pl.flush_values()
                pl.flush()
                tempcov.close()

                # Remove 'rtree' dataset from Master file if it already exists (post domain expansion)
                # to make way for reconstruction
                with HDFLockingFile(pl.master_manager.file_path, "r+") as f:
                    if "rtree" in f.keys():
                        del f["rtree"]

                # Reconstruct 'rtree' dataset
                # Open temporary Coverage and PersistenceLayer objects
                fixed_cov = AbstractCoverage.load(tempcov.persistence_dir, mode="r+")
                pl_fixed = fixed_cov._persistence_layer

                # Call update_rtree for each brick using PersistenceLayer builtin
                brick_count = 0

                if not empty_cov:
                    for brick in bls:
                        rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents(
                            brick[1][1], bD, tD
                        )
                        pl_fixed.master_manager.update_rtree(brick_count, rtree_extents, obj=brick[0])
                        brick_count += 1

                # Update parameter_bounds property based on each parameter's brick data using deep inspection
                valid_bounds_types = ["BooleanType", "ConstantType", "QuantityType", "ConstantRangeType"]

                if not empty_cov:
                    for param in pdict.keys():
                        if pdict.get_context(param).param_type.__class__.__name__ in valid_bounds_types:
                            brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                                self.cov_pth, self._guid, param
                            )
                            # Update the metadata
                            pl_fixed.update_parameter_bounds(param, [min_data_bound, max_data_bound])
                pl_fixed.flush()
                fixed_cov.close()

                # Create backup copy of original Master and Parameter files
                if backup:
                    import datetime

                    orig_master_file = os.path.join(self.cov_pth, "{0}_master.hdf5".format(self._guid))

                    # Generate the timestamp
                    tstamp_format = "%Y%m%d%H%M%S"
                    tstamp = datetime.datetime.now().strftime(tstamp_format)

                    backup_master_file = os.path.join(self.cov_pth, "{0}_master.{1}.hdf5".format(self._guid, tstamp))

                    shutil.copy2(orig_master_file, backup_master_file)

                    for param in pdict.keys():
                        param_orig = os.path.join(orig_dir, param, "{0}.hdf5".format(param))
                        param_backup = os.path.join(orig_dir, param, "{0}.{1}.hdf5".format(param, tstamp))
                        shutil.copy2(param_orig, param_backup)

                # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location
                if copy_over == True:
                    shutil.copy2(
                        os.path.join(tempcov.persistence_dir, "{0}_master.hdf5".format(self._guid)),
                        os.path.join(self.cov_pth, "{0}_master.hdf5".format(self._guid)),
                    )
                    for param in pdict.keys():
                        shutil.copy2(
                            os.path.join(temp_dir, param, "{0}.hdf5".format(param)),
                            os.path.join(orig_dir, param, "{0}.hdf5".format(param)),
                        )

                # Reanalyze the repaired coverage
                self._ar = self._do_analysis(analyze_bricks=True)

                # Verify repair worked, clean up if not
                if self._ar.is_corrupt:
                    # If the files were backed up then revert
                    if backup:
                        # Remove backed up files and clean up the repair attempt
                        log.info("Repair attempt failed.  Reverting to pre-repair state.")
                        # Use backup copy to replace post-repair file.
                        shutil.copy2(backup_master_file, orig_master_file)
                        # Delete the backup
                        os.remove(backup_master_file)

                        # Iterate over parameters and revert to pre-repair state
                        for param in pdict.keys():
                            param_orig = os.path.join(orig_dir, param, "{0}.hdf5".format(param))
                            param_backup = os.path.join(orig_dir, param, "{0}.{1}.hdf5".format(param, tstamp))
                            # Use backup copy to replace post-repair file.
                            shutil.copy2(param_backup, param_orig)
                            # Delete the backup
                            os.remove(param_backup)

                    raise ValueError("Coverage repair failed! Revert to stored backup version, if possible.")

                # Remove temporary coverage
                if keep_temp == False:
                    shutil.rmtree(tempcov_dir)
                else:
                    return tempcov_dir
        else:
            log.info("Coverage is not corrupt, nothing to repair!")
Beispiel #3
0
    def repair(self,
               backup=True,
               copy_over=True,
               keep_temp=False,
               reanalyze=False,
               analyze_bricks=False,
               detailed_analysis=False):
        """
        Heavy repair tool that recreates a blank persisted Coverage from the broken coverage's
        original construction parameters, then reconstructs the Master and Parameter metadata
        files by inspection of the ION objects and "valid" brick files.
        @return:
        """
        if self._ar is None or reanalyze:
            self._ar = self._do_analysis(analyze_bricks=analyze_bricks,
                                         detailed_analysis=detailed_analysis)

        if self._ar.is_corrupt:
            if len(self._ar.get_brick_corruptions()) > 0:
                raise NotImplementedError(
                    'Brick corruption.  Cannot repair at this time!!!')
            else:
                # Repair the Master and Parameter metadata files

                # Need the ParameterDictionary, TemporalDomain and SpatialDomain
                pdict = ParameterDictionary.load(
                    self._dso.parameter_dictionary)
                tdom = GridDomain.load(self._dso.temporal_domain)
                sdom = GridDomain.load(self._dso.spatial_domain)

                # Set up the working directory for the recovered coverage
                tempcov_dir = tempfile.mkdtemp('covs')

                # Create the temporary Coverage
                tempcov = SimplexCoverage(root_dir=tempcov_dir,
                                          persistence_guid=self._guid,
                                          name=self._guid,
                                          parameter_dictionary=pdict,
                                          spatial_domain=sdom,
                                          temporal_domain=tdom)
                # Handle to persistence layer for tempcov
                pl = tempcov._persistence_layer

                # Set up the original and temporary coverage path strings
                orig_dir = os.path.join(self.cov_pth, self._guid)
                temp_dir = os.path.join(tempcov.persistence_dir,
                                        tempcov.persistence_guid)

                # Insert same number of timesteps into temporary coverage as in broken coverage
                brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                    self.cov_pth, self._guid, 'time')
                empty_cov = brick_list_spans is None  # If None, there are no brick files --> no timesteps, empty coverage!
                if not empty_cov:  # If None, there are no brick files --> no timesteps, empty coverage!
                    bls = [s.value for s in brick_list_spans]
                    maxes = [sum(b[3]) for b in new_brick_list.values()]
                    tempcov.insert_timesteps(sum(maxes))

                    # Replace metadata is the Master file
                    pl.master_manager.brick_domains = brick_domains_new
                    pl.master_manager.brick_list = new_brick_list

                    # Repair ExternalLinks to brick files
                    f = h5py.File(pl.master_manager.file_path, 'a')
                    for param_name in pdict.keys():
                        del f[param_name]
                        f.create_group(param_name)
                        for brick in bls:
                            link_path = '/{0}/{1}'.format(param_name, brick[0])
                            brick_file_name = '{0}.hdf5'.format(brick[0])
                            brick_rel_path = os.path.join(
                                pl.parameter_metadata[param_name].root_dir.
                                replace(tempcov.persistence_dir,
                                        '.'), brick_file_name)
                            log.debug('link_path: %s', link_path)
                            log.debug('brick_rel_path: %s', brick_rel_path)
                            pl.master_manager.add_external_link(
                                link_path, brick_rel_path, brick[0])

                pl.flush_values()
                pl.flush()
                tempcov.close()

                # Remove 'rtree' dataset from Master file if it already exists (post domain expansion)
                # to make way for reconstruction
                f = h5py.File(pl.master_manager.file_path, 'a')
                if 'rtree' in f.keys():
                    del f['rtree']
                f.close()

                # Reconstruct 'rtree' dataset
                # Open temporary Coverage and PersistenceLayer objects
                fixed_cov = AbstractCoverage.load(tempcov.persistence_dir,
                                                  mode='a')
                pl_fixed = fixed_cov._persistence_layer

                # Call update_rtree for each brick using PersistenceLayer builtin
                brick_count = 0

                if not empty_cov:
                    for brick in bls:
                        rtree_extents, brick_extents, brick_active_size = pl_fixed.calculate_extents(
                            brick[1][1], bD, tD)
                        pl_fixed.master_manager.update_rtree(brick_count,
                                                             rtree_extents,
                                                             obj=brick[0])
                        brick_count += 1

                # Update parameter_bounds property based on each parameter's brick data using deep inspection
                valid_bounds_types = [
                    'BooleanType', 'ConstantType', 'QuantityType',
                    'ConstantRangeType'
                ]

                if not empty_cov:
                    for param in pdict.keys():
                        if pdict.get_context(
                                param
                        ).param_type.__class__.__name__ in valid_bounds_types:
                            brick_domains_new, new_brick_list, brick_list_spans, tD, bD, min_data_bound, max_data_bound = self.inspect_bricks(
                                self.cov_pth, self._guid, param)
                            # Update the metadata
                            pl_fixed.update_parameter_bounds(
                                param, [min_data_bound, max_data_bound])
                pl_fixed.flush()
                fixed_cov.close()

                # Create backup copy of original Master and Parameter files
                if backup:
                    import datetime
                    orig_master_file = os.path.join(
                        self.cov_pth, '{0}_master.hdf5'.format(self._guid))

                    # Generate the timestamp
                    tstamp_format = '%Y%m%d%H%M%S'
                    tstamp = datetime.datetime.now().strftime(tstamp_format)

                    backup_master_file = os.path.join(
                        self.cov_pth,
                        '{0}_master.{1}.hdf5'.format(self._guid, tstamp))

                    shutil.copy2(orig_master_file, backup_master_file)

                    for param in pdict.keys():
                        param_orig = os.path.join(orig_dir, param,
                                                  '{0}.hdf5'.format(param))
                        param_backup = os.path.join(
                            orig_dir, param,
                            '{0}.{1}.hdf5'.format(param, tstamp))
                        shutil.copy2(param_orig, param_backup)

                # Copy Master and Parameter metadata files back to original/broken coverage (cov_pth) location
                if copy_over == True:
                    shutil.copy2(
                        os.path.join(tempcov.persistence_dir,
                                     '{0}_master.hdf5'.format(self._guid)),
                        os.path.join(self.cov_pth,
                                     '{0}_master.hdf5'.format(self._guid)))
                    for param in pdict.keys():
                        shutil.copy2(
                            os.path.join(temp_dir, param,
                                         '{0}.hdf5'.format(param)),
                            os.path.join(orig_dir, param,
                                         '{0}.hdf5'.format(param)))

                # Reanalyze the repaired coverage
                self._ar = self._do_analysis(analyze_bricks=True)

                # Verify repair worked, clean up if not
                if self._ar.is_corrupt:
                    # If the files were backed up then revert
                    if backup:
                        # Remove backed up files and clean up the repair attempt
                        log.info(
                            'Repair attempt failed.  Reverting to pre-repair state.'
                        )
                        # Use backup copy to replace post-repair file.
                        shutil.copy2(backup_master_file, orig_master_file)
                        # Delete the backup
                        os.remove(backup_master_file)

                        # Iterate over parameters and revert to pre-repair state
                        for param in pdict.keys():
                            param_orig = os.path.join(orig_dir, param,
                                                      '{0}.hdf5'.format(param))
                            param_backup = os.path.join(
                                orig_dir, param,
                                '{0}.{1}.hdf5'.format(param, tstamp))
                            # Use backup copy to replace post-repair file.
                            shutil.copy2(param_backup, param_orig)
                            # Delete the backup
                            os.remove(param_backup)

                    raise ValueError(
                        'Coverage repair failed! Revert to stored backup version, if possible.'
                    )

                # Remove temporary coverage
                if keep_temp == False:
                    shutil.rmtree(tempcov_dir)
                else:
                    return tempcov_dir
        else:
            log.info('Coverage is not corrupt, nothing to repair!')