def test_collection(): """ Test collection on 'cf_profile' ensuring output array is correct """ init_logger('reV.handlers.collection') profiles = manual_collect(H5_DIR, 'peregrine_2012', 'cf_profile') h5_file = os.path.join(TEMP_DIR, 'collection.h5') Collector.collect(h5_file, H5_DIR, POINTS_PATH, 'cf_profile', dset_out=None, file_prefix='peregrine_2012') with h5py.File(h5_file, 'r') as f: cf_profiles = f['cf_profile'][...] diff = np.mean(np.abs(profiles - cf_profiles)) msg = "Arrays differ by {:.4f}".format(diff) assert np.allclose(profiles, cf_profiles), msg source_file = os.path.join(H5_DIR, "peregrine_2012_node00_x000.h5") with h5py.File(source_file, 'r') as f_s: def check_attrs(name, object): object_s = f_s[name] for k, v in object.attrs.items(): v_s = object_s.attrs[k] assert v == v_s with h5py.File(h5_file, 'r') as f: f.visititems(check_attrs) if PURGE_OUT: os.remove(h5_file)
def test_low_mem_collect(): """Test memory limited multi chunk collection""" init_logger('reV.handlers.collection', log_level='DEBUG') h5_file = os.path.join(TEMP_DIR, 'cf.h5') Collector.collect(h5_file, H5_DIR, POINTS_PATH, 'cf_profile', dset_out=None, file_prefix='peregrine_2012', mem_util_lim=0.00002) with h5py.File(h5_file, 'r') as f: assert 'cf_profile' in f data = f['cf_profile'][...] node_file = os.path.join(H5_DIR, 'peregrine_2012_node01_x001.h5') with h5py.File(node_file, 'r') as f: source_data = f['cf_profile'][...] assert np.allclose(source_data, data[:, -1 * source_data.shape[1]:]) if PURGE_OUT: os.remove(h5_file)
def test_profiles_means(): """ Test adding means to pre-collected profiles """ init_logger('reV.handlers.collection') h5_file = os.path.join(TEMP_DIR, 'cf.h5') Collector.collect(h5_file, H5_DIR, POINTS_PATH, 'cf_profile', dset_out=None, file_prefix='peregrine_2012') Collector.add_dataset(h5_file, H5_DIR, 'cf_mean', dset_out=None, file_prefix='peregrine_2012') with h5py.File(h5_file, 'r') as f: assert 'cf_profile' in f assert 'cf_mean' in f data = f['cf_profile'][...] node_file = os.path.join(H5_DIR, 'peregrine_2012_node01_x001.h5') with h5py.File(node_file, 'r') as f: source_data = f['cf_profile'][...] assert np.allclose(source_data, data[:, -1 * source_data.shape[1]:]) if PURGE_OUT: os.remove(h5_file)
def test_collect_means(): """ Test means collection: """ init_logger('reV.handlers.collection') h5_file = os.path.join(TEMP_DIR, 'cf_means.h5') Collector.collect(h5_file, H5_DIR, POINTS_PATH, 'cf_mean', dset_out=None, file_prefix='peregrine_2012') if PURGE_OUT: os.remove(h5_file)
def manual_collect(h5_dir, file_prefix, dset): """ Manually collect dset from .h5 files w/ file_prefix in h5_dir Parameters ---------- h5_dir : str Directory containing .h5 files to collect from file_prefix : str File prefix on .h5 file to collect from dset : str Dataset to collect Results ------- arr : ndarray Collected dataset array """ h5_files = Collector.find_h5_files(h5_dir, file_prefix) arr = [] for file in h5_files: with h5py.File(file, 'r') as f: arr.append(f[dset][...]) return np.hstack(arr)
def test_means_lcoe(): """ Test adding means to pre-collected profiles """ init_logger('reV.handlers.collection') h5_file = os.path.join(TEMP_DIR, 'cf_lcoe.h5') Collector.collect(h5_file, H5_DIR, POINTS_PATH, 'cf_mean', dset_out=None, file_prefix='peregrine_2012') Collector.add_dataset(h5_file, H5_DIR, 'lcoe_fcr', dset_out=None, file_prefix='peregrine_2012') with h5py.File(h5_file, 'r') as f: assert 'cf_mean' in f assert 'lcoe_fcr' in f if PURGE_OUT: os.remove(h5_file)
def collect(ctx, verbose): """Run collection on local worker.""" name = ctx.obj['NAME'] h5_file = ctx.obj['H5_FILE'] h5_dir = ctx.obj['H5_DIR'] project_points = ctx.obj['PROJECT_POINTS'] dsets = ctx.obj['DSETS'] file_prefix = ctx.obj['FILE_PREFIX'] log_dir = ctx.obj['LOG_DIR'] purge_chunks = ctx.obj['PURGE_CHUNKS'] verbose = any([verbose, ctx.obj['VERBOSE']]) # initialize loggers for multiple modules init_mult(name, log_dir, modules=[__name__, 'reV.handlers.collection'], verbose=verbose, node=True) for key, val in ctx.obj.items(): logger.debug('ctx var passed to collection method: "{}" : "{}" ' 'with type "{}"'.format(key, val, type(val))) logger.info('Collection is being run for "{}" with job name "{}" ' 'and collection dir: {}. Target output path is: {}'.format( dsets, name, h5_dir, h5_file)) t0 = time.time() Collector.collect(h5_file, h5_dir, project_points, dsets[0], file_prefix=file_prefix) if len(dsets) > 1: for dset_name in dsets[1:]: Collector.add_dataset(h5_file, h5_dir, dset_name, file_prefix=file_prefix) if purge_chunks: Collector.purge_chunks(h5_file, h5_dir, project_points, file_prefix=file_prefix) else: Collector.move_chunks(h5_file, h5_dir, project_points, file_prefix=file_prefix) runtime = (time.time() - t0) / 60 logger.info('Collection completed in: {:.2f} min.'.format(runtime)) # add job to reV status file. status = { 'dirout': os.path.dirname(h5_file), 'fout': os.path.basename(h5_file), 'job_status': 'successful', 'runtime': runtime, 'finput': os.path.join(h5_dir, '{}*.h5'.format(file_prefix)) } Status.make_job_file(os.path.dirname(h5_file), 'collect', name, status)