def sub_listen(msg, headers): assertions(isinstance(msg, StreamGranuleContainer), 'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1, 'Checksum failed.') assertions( msg.identifiables[element_count_id].value == 1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path], fields, 2).next() for field in fields: comparison = (input_vectors[field]['values'] == output_vectors[field]['values']) assertions( comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'], output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True)
def create_known(dataset_name, rootgrp_name, grp_name): """ A known array to compare against during tests """ known_array = numpy.random.rand(10, 20) filename = FileSystem.get_url(FS.TEMP, random_name(), ".hdf5") # Write an hdf file with known values to compare against h5pyfile = h5py.File(filename, mode='w', driver='core') grp = h5pyfile.create_group(rootgrp_name) subgrp = grp.create_group(grp_name) dataset = subgrp.create_dataset(dataset_name, known_array.shape, known_array.dtype.str, compression='gzip', compression_opts=4, maxshape=(None, None)) dataset.write_direct(known_array) h5pyfile.close() # convert the hdf file into a binary string f = open(filename, mode='rb') # read the binary string representation of the file known_hdf_as_string = f.read( ) # this is a known string to compare against during tests f.close() # cleaning up FileSystem.unlink(f.name) return known_array, known_hdf_as_string
def create_known(dataset_name, rootgrp_name, grp_name): """ A known array to compare against during tests """ known_array = numpy.ones((10,20)) filename = FileSystem.get_url(FS.TEMP,random_name(), ".hdf5") # Write an hdf file with known values to compare against h5pyfile = h5py.File(filename, mode = 'w', driver='core') grp = h5pyfile.create_group(rootgrp_name) subgrp = grp.create_group(grp_name) dataset = subgrp.create_dataset(dataset_name, known_array.shape, known_array.dtype.str, maxshape=(None,None)) dataset.write_direct(known_array) h5pyfile.close() # convert the hdf file into a binary string f = open(filename, mode='rb') # read the binary string representation of the file known_hdf_as_string = f.read() # this is a known string to compare against during tests f.close() # cleaning up FileSystem.unlink(f.name) return known_array, known_hdf_as_string
def tearDown(self): """ Cleanup. Delete Subscription, Stream, Process Definition """ for fname in self.fnames: FileSystem.unlink(fname)
def _force_clean(cls, recreate=False, initial=False): # Database resources from pyon.core.bootstrap import get_sys_name, CFG from pyon.datastore.datastore_common import DatastoreFactory datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE, scope=get_sys_name()) if initial: datastore._init_database(datastore.database) dbs = datastore.list_datastores() clean_prefix = '%s_' % get_sys_name().lower() things_to_clean = [x for x in dbs if x.startswith(clean_prefix)] try: for thing in things_to_clean: datastore.delete_datastore(datastore_name=thing) if recreate: datastore.create_datastore(datastore_name=thing) finally: datastore.close() # Broker resources from putil.rabbitmq.rabbit_util import RabbitManagementUtil rabbit_util = RabbitManagementUtil(CFG, sysname=bootstrap.get_sys_name()) deleted_exchanges, deleted_queues = rabbit_util.clean_by_sysname() log.info("Deleted %s exchanges, %s queues" % (len(deleted_exchanges), len(deleted_queues))) # File system from pyon.util.file_sys import FileSystem FileSystem._clean(CFG)
def _get_time_index(self, granule, timeval): ''' @brief Obtains the index where a time's value is @param granule must be a complete dataset (hdf_string provided) @param timeval the vector value @return Index value for timeval or closest approx such that timeval is IN the subset ''' assert isinstance(granule, StreamGranuleContainer), 'object is not a granule.' assert granule.identifiables[ self.data_stream_id].values, 'hdf_string is not provided.' hdf_string = granule.identifiables[self.data_stream_id].values file_path = self._get_hdf_from_string(hdf_string) #------------------------------------------------------------------------------------- # Determine the field_id for the temporal coordinate vector (aka time) #------------------------------------------------------------------------------------- time_field = self.definition.identifiables[ self.time_id].coordinate_ids[0] value_path = granule.identifiables[ time_field].values_path or self.definition.identifiables[ time_field].values_path record_count = granule.identifiables[self.element_count_id].value #------------------------------------------------------------------------------------- # Go through the time vector and get the indexes that correspond to the timeval # It will find a value such that # t_n <= i < t_(n+1), where i is the index #------------------------------------------------------------------------------------- var_name = value_path.split('/').pop() res = acquire_data([file_path], [var_name], record_count).next() time_vector = res[var_name]['values'] retval = 0 for i in xrange(len(time_vector)): if time_vector[i] == timeval: retval = i break elif i == 0 and time_vector[i] > timeval: retval = i break elif (i + 1) < len(time_vector): # not last val if time_vector[i] < timeval and time_vector[i + 1] > timeval: retval = i break else: # last val retval = i break FileSystem.unlink(file_path) return retval
def on_start(self): #these values should come in from a config file, maybe pyon.yml self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port) self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml") self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/' filename = self.datasets_xml_path.split('/')[-1] base = '/'.join(self.datasets_xml_path.split('/')[:-1]) real_path = FileSystem.get_extended_url(base) self.datasets_xml_path = os.path.join(real_path, filename) self.setup_filesystem(real_path)
def _slice(self, granule, slice_): ''' @brief Creates a granule which is a slice of the granule parameter @param granule the superset @param slice_ The slice values for which to create the granule @return Crafted subset granule of the parameter granule. ''' retval = copy.deepcopy(granule) fields = self._list_data(self.definition, granule) record_count = slice_.stop - slice_.start assert record_count > 0, 'slice is malformed' pairs = self._pair_up(granule) var_names = list([i[0] for i in pairs]) # Get the var_names from the pairs log.debug('var_names: %s', var_names) file_path = self._get_hdf_from_string( granule.identifiables[self.data_stream_id].values) codec = HDFEncoder() vectors = acquire_data([file_path], var_names, record_count, slice_).next() for row, value in vectors.iteritems(): vp = self._find_vp(pairs, row) # Determine the range_id reverse dictionary lookup #@todo: improve this pattern for field, path in fields.iteritems(): if vp == path: range_id = field break bounds_id = retval.identifiables[range_id].bounds_id # Recalculate the bounds for this fields and update the granule range = value['range'] retval.identifiables[bounds_id].value_pair[0] = float(range[0]) retval.identifiables[bounds_id].value_pair[1] = float(range[1]) codec.add_hdf_dataset(vp, value['values']) record_count = len(value['values']) #----- DEBUGGING --------- log.debug('slice- row: %s', row) log.debug('slice- value_path: %s', vp) log.debug('slice- range_id: %s', range_id) log.debug('slice- bounds_id: %s', bounds_id) log.debug('slice- limits: %s', value['range']) #------------------------- retval.identifiables[self.element_count_id].value = record_count hdf_string = codec.encoder_close() self._patch_granule(retval, hdf_string) FileSystem.unlink(file_path) return retval
def _force_clean(cls, recreate=False): from pyon.core.bootstrap import get_sys_name, CFG from pyon.datastore.couchdb.couchdb_standalone import CouchDataStore datastore = CouchDataStore(config=CFG) dbs = datastore.list_datastores() things_to_clean = filter(lambda x: x.startswith('%s_' % get_sys_name().lower()), dbs) try: for thing in things_to_clean: datastore.delete_datastore(datastore_name=thing) if recreate: datastore.create_datastore(datastore_name=thing) finally: datastore.close() FileSystem._clean(CFG)
def _get_time_index(self, granule, timeval): ''' @brief Obtains the index where a time's value is @param granule must be a complete dataset (hdf_string provided) @param timeval the vector value @return Index value for timeval or closest approx such that timeval is IN the subset ''' assert isinstance(granule, StreamGranuleContainer), 'object is not a granule.' assert granule.identifiables[self.data_stream_id].values, 'hdf_string is not provided.' hdf_string = granule.identifiables[self.data_stream_id].values file_path = self._get_hdf_from_string(hdf_string) #------------------------------------------------------------------------------------- # Determine the field_id for the temporal coordinate vector (aka time) #------------------------------------------------------------------------------------- time_field = self.definition.identifiables[self.time_id].coordinate_ids[0] value_path = granule.identifiables[time_field].values_path or self.definition.identifiables[time_field].values_path record_count = granule.identifiables[self.element_count_id].value #------------------------------------------------------------------------------------- # Go through the time vector and get the indexes that correspond to the timeval # It will find a value such that # t_n <= i < t_(n+1), where i is the index #------------------------------------------------------------------------------------- var_name = value_path.split('/').pop() res = acquire_data([file_path], [var_name], record_count).next() time_vector = res[var_name]['values'] retval = 0 for i in xrange(len(time_vector)): if time_vector[i] == timeval: retval = i break elif i==0 and time_vector[i] > timeval: retval = i break elif (i+1) < len(time_vector): # not last val if time_vector[i] < timeval and time_vector[i+1] > timeval: retval = i break else: # last val retval = i break FileSystem.unlink(file_path) return retval
def setUp(self): # This test does not start a container so we have to hack creating a FileSystem singleton instance FileSystem(DotDict()) self.px_ctd = SimpleCtdPublisher() self.px_ctd.last_time = 0 self.tx_L0 = ctd_L0_all() self.tx_L0.streams = defaultdict(Mock) self.tx_L0.conductivity = Mock() self.tx_L0.temperature = Mock() self.tx_L0.pressure = Mock() self.tx_L1_C = CTDL1ConductivityTransform() self.tx_L1_C.streams = defaultdict(Mock) self.tx_L1_T = CTDL1TemperatureTransform() self.tx_L1_T.streams = defaultdict(Mock) self.tx_L1_P = CTDL1PressureTransform() self.tx_L1_P.streams = defaultdict(Mock) self.tx_L2_S = SalinityTransform() self.tx_L2_S.streams = defaultdict(Mock) self.tx_L2_D = DensityTransform() self.tx_L2_D.streams = defaultdict(Mock)
def _splice_coverage(cls, dataset_id, scov): file_root = FileSystem.get_url(FS.CACHE, 'datasets') vcov = cls._get_coverage(dataset_id, mode='a') scov_pth = scov.persistence_dir if isinstance(vcov.reference_coverage, SimplexCoverage): ccov = ComplexCoverage( file_root, uuid4().hex, 'Complex coverage for %s' % dataset_id, reference_coverage_locs=[ vcov.head_coverage_path, ], parameter_dictionary=ParameterDictionary(), complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION) log.info('Creating Complex Coverage: %s', ccov.persistence_dir) ccov.append_reference_coverage(scov_pth) ccov_pth = ccov.persistence_dir ccov.close() vcov.replace_reference_coverage(ccov_pth) elif isinstance(vcov.reference_coverage, ComplexCoverage): log.info('Appending simplex coverage to complex coverage') #vcov.reference_coverage.append_reference_coverage(scov_pth) dir_path = vcov.reference_coverage.persistence_dir vcov.close() ccov = AbstractCoverage.load(dir_path, mode='a') ccov.append_reference_coverage(scov_pth) ccov.refresh() ccov.close() vcov.refresh() vcov.close()
def on_start(self): super(TransformCapture, self).on_start() # #@todo: Remove debugging statements log.debug('(Transform: %s) Starting...', self.name) self.file_name = self.CFG.get_safe( 'process.file_name', FileSystem.get_url(FS.TEMP, 'transform_output'))
def _splice_coverage(cls, dataset_id, scov): file_root = FileSystem.get_url(FS.CACHE,'datasets') vcov = cls._get_coverage(dataset_id,mode='a') scov_pth = scov.persistence_dir if isinstance(vcov.reference_coverage, SimplexCoverage): ccov = ComplexCoverage(file_root, uuid4().hex, 'Complex coverage for %s' % dataset_id, reference_coverage_locs=[vcov.head_coverage_path,], parameter_dictionary=ParameterDictionary(), complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION) log.info('Creating Complex Coverage: %s', ccov.persistence_dir) ccov.append_reference_coverage(scov_pth) ccov_pth = ccov.persistence_dir ccov.close() vcov.replace_reference_coverage(ccov_pth) elif isinstance(vcov.reference_coverage, ComplexCoverage): log.info('Appending simplex coverage to complex coverage') #vcov.reference_coverage.append_reference_coverage(scov_pth) dir_path = vcov.reference_coverage.persistence_dir vcov.close() ccov = AbstractCoverage.load(dir_path, mode='a') ccov.append_reference_coverage(scov_pth) ccov.refresh() ccov.close() vcov.refresh() vcov.close()
def make_some_data(self): import numpy as np stream_id = 'I am very special' definition = SBE37_CDM_stream_definition() definition.stream_resource_id = stream_id self.couch.create(definition) total = 200 n = 10 # at most n records per granule i = 0 while i < total: r = random.randint(1,n) psc = PointSupplementConstructor(point_definition=definition, stream_id=stream_id) for x in xrange(r): i+=1 point_id = psc.add_point(time=i, location=(0,0,0)) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=np.random.normal(loc=48.0,scale=4.0, size=1)[0]) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=np.float32(1.0)) psc.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=np.float32(2.0)) granule = psc.close_stream_granule() hdf_string = granule.identifiables[definition.data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() with open(FileSystem.get_hierarchical_url(FS.CACHE, '%s.hdf5' % sha1),'w') as f: f.write(hdf_string) granule.identifiables[definition.data_stream_id].values = '' self.couch.create(granule)
def get_datasets_xml_path(cls, cfg): datasets_xml_path = cfg.get_safe('server.pydap.datasets_xml_path', 'RESOURCE:ext/datasets.xml') base, filename = os.path.split(datasets_xml_path) base = FileSystem.get_extended_url(base) path = os.path.join(base, filename) return path
def _get_cov(self, name, nt): path = CFG.get_safe('server.pydap.data_path', "RESOURCE:ext/pydap") ext_path = FileSystem.get_extended_url(path) cov,filename = _make_coverage(ext_path, "the_cov") cov.insert_timesteps(nt) cov.set_parameter_values("time", value=nt) return cov, filename
def __init__(self, name = None): """ @param name The name of the dataset """ # generate a random name for the filename if it has not been provided. self.filename = FileSystem.get_url(fs=FS.TEMP, filename=name or random_name(), ext='encoder.hdf5') # Using inline imports to put off making hdf/numpy required dependencies import h5py # open an hdf file on disk - in /tmp to write data to since we can't yet do in memory try: log.debug("Creating h5py file object for the encoder at %s" % self.filename) if os.path.isfile(self.filename): # if file exists, then append to it self.h5pyfile = h5py.File(self.filename, mode = 'r+', driver='core') else: # if file does not already exist, write a new one self.h5pyfile = h5py.File(self.filename, mode = 'w', driver='core') assert self.h5pyfile, 'No h5py file object created.' except IOError: log.debug("Error opening file for the HDFEncoder! ") raise HDFEncoderException("Error while trying to open file. ") except AssertionError as err: log.debug(err.message) raise HDFEncoderException(err.message)
def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain,temporal_domain): pdict = ParameterDictionary.load(parameter_dict) sdom = GridDomain.load(spatial_domain) tdom = GridDomain.load(temporal_domain) file_root = FileSystem.get_url(FS.CACHE,'datasets') scov = SimplexCoverage(file_root,dataset_id,description or dataset_id,parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom, inline_data_writes=self.inline_data_writes) return scov
def _slice(self,granule,slice_): ''' @brief Creates a granule which is a slice of the granule parameter @param granule the superset @param slice_ The slice values for which to create the granule @return Crafted subset granule of the parameter granule. ''' retval = copy.deepcopy(granule) fields = self._list_data(self.definition,granule) record_count = slice_.stop - slice_.start assert record_count > 0, 'slice is malformed' pairs = self._pair_up(granule) var_names = list([i[0] for i in pairs]) # Get the var_names from the pairs log.debug('var_names: %s',var_names) file_path = self._get_hdf_from_string(granule.identifiables[self.data_stream_id].values) codec = HDFEncoder() vectors = acquire_data([file_path],var_names,record_count,slice_ ).next() for row, value in vectors.iteritems(): vp = self._find_vp(pairs, row) # Determine the range_id reverse dictionary lookup #@todo: improve this pattern for field,path in fields.iteritems(): if vp==path: range_id = field break bounds_id = retval.identifiables[range_id].bounds_id # Recalculate the bounds for this fields and update the granule range = value['range'] retval.identifiables[bounds_id].value_pair[0] = float(range[0]) retval.identifiables[bounds_id].value_pair[1] = float(range[1]) codec.add_hdf_dataset(vp, value['values']) record_count = len(value['values']) #----- DEBUGGING --------- log.debug('slice- row: %s', row) log.debug('slice- value_path: %s', vp) log.debug('slice- range_id: %s', range_id) log.debug('slice- bounds_id: %s', bounds_id) log.debug('slice- limits: %s', value['range']) #------------------------- retval.identifiables[self.element_count_id].value = record_count hdf_string = codec.encoder_close() self._patch_granule(retval, hdf_string) FileSystem.unlink(file_path) return retval
def __init__(self, *args, **kwargs): BaseContainerAgent.__init__(self, *args, **kwargs) self._is_started = False self._capabilities = [] # set container id and cc_agent name (as they are set in base class call) self.id = get_default_container_id() self.name = "cc_agent_%s" % self.id Container.instance = self from pyon.core import bootstrap bootstrap.container_instance = self log.debug("Container (sysname=%s) initializing ..." % bootstrap.get_sys_name()) # DatastoreManager - controls access to Datastores (both mock and couch backed) self.datastore_manager = DatastoreManager() self.datastore_manager.start() self._capabilities.append("DATASTORE_MANAGER") # Keep track of the overrides from the command-line, so they can trump app/rel file data self.spawn_args = kwargs # Instantiate Directory and self-register # Has the additional side effect of either # bootstrapping the configuration into the # directory or read the configuration based # in the value of the auto_bootstrap setting self.directory = Directory() # Create this Container's specific ExchangeManager instance self.ex_manager = ExchangeManager(self) # Create this Container's specific ProcManager instance self.proc_manager = ProcManager(self) # Create this Container's specific AppManager instance self.app_manager = AppManager(self) # File System - Interface to the OS File System, using correct path names and setups self.file_system = FileSystem(CFG) # Governance Controller - manages the governance related interceptors self.governance_controller = GovernanceController(self) # sFlow manager - controls sFlow stat emission self.sflow_manager = SFlowManager(self) # Coordinates the container start self._status = "INIT" # protection for when the container itself is used as a Process for clients self.container = self log.debug("Container initialized, OK.")
def upload_qc(): upload_folder = FileSystem.get_url(FS.TEMP, 'uploads') try: object_store = Container.instance.object_store # required fields upload = request.files['file'] # <input type=file name="file"> if upload: # upload file - run filename through werkzeug.secure_filename filename = secure_filename(upload.filename) path = os.path.join(upload_folder, filename) upload_time = time.time() upload.save(path) filetype = _check_magic( upload) or 'CSV' # Either going to be ZIP or CSV, probably # register upload file_upload_context = { 'name': 'User uploaded QC file %s' % filename, 'filename': filename, 'filetype': filetype, # only CSV, no detection necessary 'path': path, 'upload_time': upload_time, 'status': 'File uploaded to server' } fuc_id, _ = object_store.create_doc(file_upload_context) # client to process dispatch pd_client = ProcessDispatcherServiceClient() # create process definition process_definition = ProcessDefinition( name='upload_qc_processor', executable={ 'module': 'ion.processes.data.upload.upload_qc_processing', 'class': 'UploadQcProcessing' }) process_definition_id = pd_client.create_process_definition( process_definition) # create process process_id = pd_client.create_process(process_definition_id) #schedule process config = DotDict() config.process.fuc_id = fuc_id pid = pd_client.schedule_process(process_definition_id, process_id=process_id, configuration=config) log.info('UploadQcProcessing process created %s' % pid) # response - only FileUploadContext ID and determined filetype for UX display resp = {'fuc_id': fuc_id} return gateway_json_response(resp) raise BadRequest('Invalid Upload') except Exception as e: return build_error_response(e)
def _force_clean(cls, recreate=False): from pyon.core.bootstrap import get_sys_name, CFG from pyon.datastore.datastore_common import DatastoreFactory datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE, scope=get_sys_name()) #datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE) dbs = datastore.list_datastores() things_to_clean = filter(lambda x: x.startswith('%s_' % get_sys_name().lower()), dbs) try: for thing in things_to_clean: datastore.delete_datastore(datastore_name=thing) if recreate: datastore.create_datastore(datastore_name=thing) finally: datastore.close() FileSystem._clean(CFG)
def refresh_datasets_xml(self): datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml") filename = datasets_xml_path.split('/')[-1] base = '/'.join(datasets_xml_path.split('/')[:-1]) real_path = FileSystem.get_extended_url(base) datasets_xml_path = os.path.join(real_path, filename) os.remove(datasets_xml_path)
def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom): pd = self.dataset_management_client.read_parameter_dictionary(parameter_dict_id) pdict = ParameterDictionary.load(pd) sdom = GridDomain.load(spatial_dom.dump()) tdom = GridDomain.load(time_dom.dump()) file_root = FileSystem.get_url(FS.CACHE,'datasets') scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) return scov
def on_start(self): #these values should come in from a config file, maybe pyon.yml self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port) self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.datasets_xml_path = self.CFG.get_safe( 'server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml") self.pydap_data_path = FileSystem.get_extended_url( self.pydap_data_path) + '/' filename = self.datasets_xml_path.split('/')[-1] base = '/'.join(self.datasets_xml_path.split('/')[:-1]) real_path = FileSystem.get_extended_url(base) self.datasets_xml_path = os.path.join(real_path, filename) self.setup_filesystem(real_path)
def read_persisted_cache(self, sha1, encoding): byte_string = None path = FileSystem.get_hierarchical_url(FS.CACHE,sha1,'.%s' % encoding) try: with open(path, 'r') as f: byte_string = f.read() except IOError as e: raise BadRequest(e.message) return byte_string
def process(self,packet): input = int(packet.get('num',0)) prep = 'echo \'1+%d\' | bc' %(input) output = commands.getoutput(prep) if self.has_output: self.publish(dict(num=output)) with open(FileSystem.get_url(FS.TEMP,"transform_output"),'a') as f: f.write('(%s): Received %s, transform: %s\n' %(self.name, packet, output))
def _create_view_coverage(self, dataset_id, description, parent_dataset_id): # As annoying as it is we need to load the view coverage belonging to parent dataset id and use the information # inside to build the new one... file_root = FileSystem.get_url(FS.CACHE,'datasets') pscov = self._get_simplex_coverage(parent_dataset_id, mode='r') scov_location = pscov.persistence_dir pscov.close() vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov_location) return vcov
def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain,temporal_domain): file_root = FileSystem.get_url(FS.CACHE,'datasets') pdict = ParameterDictionary.load(parameter_dict) sdom = GridDomain.load(spatial_domain) tdom = GridDomain.load(temporal_domain) scov = self._create_simplex_coverage(dataset_id, pdict, sdom, tdom, self.inline_data_writes) vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov.persistence_dir) scov.close() return vcov
def sub_listen(msg, headers): assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.') assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path],fields,2).next() for field in fields: comparison = (input_vectors[field]['values']==output_vectors[field]['values']) assertions(comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'],output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True)
def upload_qc(): upload_folder = FileSystem.get_url(FS.TEMP,'uploads') try: object_store = Container.instance.object_store # required fields upload = request.files['file'] # <input type=file name="file"> if upload: # upload file - run filename through werkzeug.secure_filename filename = secure_filename(upload.filename) path = os.path.join(upload_folder, filename) upload_time = time.time() upload.save(path) filetype = _check_magic(upload) or 'CSV' # Either going to be ZIP or CSV, probably # register upload file_upload_context = { 'name': 'User uploaded QC file %s' % filename, 'filename': filename, 'filetype': filetype, # only CSV, no detection necessary 'path': path, 'upload_time': upload_time, 'status': 'File uploaded to server' } fuc_id, _ = object_store.create_doc(file_upload_context) # client to process dispatch pd_client = ProcessDispatcherServiceClient() # create process definition process_definition = ProcessDefinition( name='upload_qc_processor', executable={ 'module': 'ion.processes.data.upload.upload_qc_processing', 'class': 'UploadQcProcessing' } ) process_definition_id = pd_client.create_process_definition(process_definition) # create process process_id = pd_client.create_process(process_definition_id) # schedule process config = DotDict() config.process.fuc_id = fuc_id pid = pd_client.schedule_process(process_definition_id, process_id=process_id, configuration=config) log.info('UploadQcProcessing process created %s' % pid) # response - only FileUploadContext ID and determined filetype for UX display resp = {'fuc_id': fuc_id} return gateway_json_response(resp) raise BadRequest('Invalid Upload') except Exception as e: return build_error_response(e)
def __init__(self, *args, **kwargs): BaseContainerAgent.__init__(self, *args, **kwargs) self._is_started = False # set id and name (as they are set in base class call) self.id = string.replace('%s_%d' % (os.uname()[1], os.getpid()), ".", "_") self.name = "cc_agent_%s" % self.id Container.instance = self # TODO: Bug: Replacing CFG instance not work because references are already public. Update directly dict_merge(CFG, kwargs, inplace=True) from pyon.core import bootstrap bootstrap.container_instance = self bootstrap.assert_configuration(CFG) log.debug("Container (sysname=%s) initializing ..." % bootstrap.get_sys_name()) # Keep track of the overrides from the command-line, so they can trump app/rel file data self.spawn_args = kwargs # Load object and service registry etc. bootstrap_pyon() # Create this Container's specific ExchangeManager instance self.ex_manager = ExchangeManager(self) # Create this Container's specific ProcManager instance self.proc_manager = ProcManager(self) # Create this Container's specific AppManager instance self.app_manager = AppManager(self) # DatastoreManager - controls access to Datastores (both mock and couch backed) self.datastore_manager = DatastoreManager() # File System - Interface to the OS File System, using correct path names and setups self.file_system = FileSystem(CFG) # Governance Controller - manages the governance related interceptors self.governance_controller = GovernanceController(self) # sFlow manager - controls sFlow stat emission self.sflow_manager = SFlowManager(self) # Coordinates the container start self._is_started = False self._capabilities = [] self._status = "INIT" # protection for when the container itself is used as a Process for clients self.container = self log.debug("Container initialized, OK.")
def process(self, packet): input = int(packet.get('num', 0)) prep = 'echo \'1+%d\' | bc' % (input) output = commands.getoutput(prep) if self.has_output: self.publish(dict(num=output)) with open(FileSystem.get_url(FS.TEMP, "transform_output"), 'a') as f: f.write('(%s): Received %s, transform: %s\n' % (self.name, packet, output))
def _create_simplex_coverage(cls, dataset_id, parameter_dictionary, spatial_domain, temporal_domain): file_root = FileSystem.get_url(FS.CACHE, 'datasets') scov = SimplexCoverage(file_root, dataset_id, 'Simplex Coverage for %s' % dataset_id, parameter_dictionary=parameter_dictionary, temporal_domain=temporal_domain, spatial_domain=spatial_domain) return scov
def _create_complex_coverage(cls, dataset_id, description, parameter_dict): pdict = ParameterDictionary.load(parameter_dict) file_root = FileSystem.get_url(FS.CACHE, 'datasets') ccov = ComplexCoverage( file_root, dataset_id, 'Complex Coverage for %s' % dataset_id, parameter_dictionary=pdict, complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION) return ccov
def on_start(self): #these values should come in from a config file, maybe pyon.yml self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port) self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.datasets_xml_path = self.get_datasets_xml_path(self.CFG) self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/' self.setup_filesystem(self.datasets_xml_path) self.ux_url = self.CFG.get_safe('system.web_ui_url','http://localhost:3000/')
def _get_hdf_from_string(self, hdf_string): ''' @param hdf_string binary string consisting of an HDF5 file. @return temporary file (full path) where the string was written to. @note client's responsible to unlink when finished. ''' f = FileSystem.mktemp() f.write(hdf_string) retval = f.name f.close() return retval
def refresh_datasets_xml(self): datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml") filename = datasets_xml_path.split('/')[-1] base = '/'.join(datasets_xml_path.split('/')[:-1]) real_path = FileSystem.get_extended_url(base) datasets_xml_path = os.path.join(real_path, filename) try: os.remove(datasets_xml_path) except OSError: pass # File doesn't exist
def check_msg(msg, header): assertions(isinstance(msg, StreamGranuleContainer), 'Msg is not a container') hdf_string = msg.identifiables[msg.data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() log.debug('Sha1 matches') log.debug('Dumping file so you can inspect it.') log.debug('Records: %d' % msg.identifiables['record_count'].value) with open(FileSystem.get_url(FS.TEMP,'%s.cap.hdf5' % sha1[:8]),'w') as f: f.write(hdf_string) log.debug('Stream Capture: %s', f.name) result.set(True)
def on_start(self): #these values should come in from a config file, maybe pyon.yml self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port) self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.datasets_xml_path = self.get_datasets_xml_path(self.CFG) self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/' self.ux_url = self.CFG.get_safe('system.web_ui_url','http://localhost:3000/') self.jenv = Environment(loader=FileSystemLoader('res/templates'), trim_blocks=True, lstrip_blocks=True) self.resource_registry = self.container.resource_registry self.setup_filesystem(self.datasets_xml_path)
def check_msg(msg, header): assertions(isinstance(msg, StreamGranuleContainer), 'Msg is not a container') hdf_string = msg.identifiables[msg.data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() log.debug('Sha1 matches') log.debug('Dumping file so you can inspect it.') log.debug('Records: %d' % msg.identifiables['record_count'].value) with open(FileSystem.get_url(FS.TEMP, '%s.cap.hdf5' % sha1[:8]), 'w') as f: f.write(hdf_string) log.debug('Stream Capture: %s', f.name) result.set(True)
def _create_view_coverage(self, dataset_id, description, parent_dataset_id): # As annoying as it is we need to load the view coverage belonging to parent dataset id and use the information # inside to build the new one... file_root = FileSystem.get_url(FS.CACHE, 'datasets') pscov = self._get_simplex_coverage(parent_dataset_id, mode='r') scov_location = pscov.persistence_dir pscov.close() vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov_location) return vcov
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') path = CFG.get_safe('server.pydap.data_path', "RESOURCE:ext/pydap") ext_path = FileSystem.get_extended_url(path) self.cov,self.filename = _make_coverage(ext_path) self.nt = 5 self.cov.insert_timesteps(self.nt) self.time_data = [i+1 for i in range(self.nt)] self.cov.set_parameter_values("time", value=self.time_data) host = CFG.get_safe('container.pydap_gateway.web_server.host', 'localhost') port = CFG.get_safe('container.pydap_gateway.web_server.port', '8001') self.request_url = "http://"+host+":"+str(port)+os.sep+os.path.basename(self.filename)
def process(self, packet): """Processes incoming data!!!! """ output = int(packet.get('num', 0)) + 1 log.debug('(%s) Processing Packet: %s', self.name, packet) log.debug('(%s) Transform Complete: %s', self.name, output) if self.has_output: self.publish(dict(num=str(output))) with open(FileSystem.get_url(FS.TEMP, "transform_output"), 'a') as f: f.write('(%s): Received Packet: %s\n' % (self.name, packet)) f.write('(%s): - Transform - %d\n' % (self.name, output))
def process(self, packet): """Processes incoming data!!!! """ output = int(packet.get('num',0)) + 1 log.debug('(%s) Processing Packet: %s',self.name,packet) log.debug('(%s) Transform Complete: %s', self.name, output) if self.has_output: self.publish(dict(num=str(output))) with open(FileSystem.get_url(FS.TEMP,"transform_output"),'a') as f: f.write('(%s): Received Packet: %s\n' % (self.name,packet)) f.write('(%s): - Transform - %d\n' % (self.name,output))
def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain, temporal_domain): pdict = ParameterDictionary.load(parameter_dict) sdom = GridDomain.load(spatial_domain) tdom = GridDomain.load(temporal_domain) file_root = FileSystem.get_url(FS.CACHE, 'datasets') scov = SimplexCoverage(file_root, dataset_id, description or dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom, inline_data_writes=self.inline_data_writes) return scov
def on_start(self): super(LightweightPyDAP,self).on_start() self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) self.app = make_app(None, self.pydap_data_path, 'ion/core/static/templates/') self.log = getLogger('pydap') self.log.write = self.log.info self.server = WSGIServer((self.pydap_host, int(self.pydap_port)), self.app, log=self.log) self.server.start()
def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain, temporal_domain): file_root = FileSystem.get_url(FS.CACHE, 'datasets') pdict = ParameterDictionary.load(parameter_dict) sdom = GridDomain.load(spatial_domain) tdom = GridDomain.load(temporal_domain) scov = self._create_simplex_coverage(dataset_id, pdict, sdom, tdom, self.inline_data_writes) vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov.persistence_dir) scov.close() return vcov
def hdf_to_string(self): """ Convert the temporary hdf file holding the data into a binary string. Cleanup by deleting the hdf file and return the binary string. @retval hdf_string """ # Return Value # ------------ # hdf_string: '' # try: # open the hdf5 file using python 'open()' f = open(self.filename, mode='rb') # read the binary string representation of the file hdf_string = f.read() f.close() except IOError: log.exception("Error opening binary file for reading out hdfstring in HDFEncoder. ") raise HDFEncoderException("Error while trying to open file. ") finally: FileSystem.unlink(self.filename) return hdf_string
def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom): pd = self.dataset_management_client.read_parameter_dictionary( parameter_dict_id) pdict = ParameterDictionary.load(pd) sdom = GridDomain.load(spatial_dom.dump()) tdom = GridDomain.load(time_dom.dump()) file_root = FileSystem.get_url(FS.CACHE, 'datasets') scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom) return scov
def _create_simplex_coverage(cls, dataset_id, parameter_dictionary, spatial_domain, temporal_domain, inline_data_writes=True): file_root = FileSystem.get_url(FS.CACHE, 'datasets') scov = SimplexCoverage(file_root, uuid4().hex, 'Simplex Coverage for %s' % dataset_id, parameter_dictionary=parameter_dictionary, temporal_domain=temporal_domain, spatial_domain=spatial_domain, inline_data_writes=inline_data_writes) return scov
def setUpClass(cls): # This test does not start a container so we have to hack creating a FileSystem singleton instance FileSystem(DotDict()) @unittest.skipIf(no_numpy_h5py, 'numpy and/or h5py not imported') def create_known(dataset_name, rootgrp_name, grp_name): """ A known array to compare against during tests """ known_array = numpy.random.rand(10, 20) filename = FileSystem.get_url(FS.TEMP, random_name(), ".hdf5") # Write an hdf file with known values to compare against h5pyfile = h5py.File(filename, mode='w', driver='core') grp = h5pyfile.create_group(rootgrp_name) subgrp = grp.create_group(grp_name) dataset = subgrp.create_dataset(dataset_name, known_array.shape, known_array.dtype.str, compression='gzip', compression_opts=4, maxshape=(None, None)) dataset.write_direct(known_array) h5pyfile.close() # convert the hdf file into a binary string f = open(filename, mode='rb') # read the binary string representation of the file known_hdf_as_string = f.read( ) # this is a known string to compare against during tests f.close() # cleaning up FileSystem.unlink(f.name) return known_array, known_hdf_as_string # Use the class method to patch these attributes onto the class. TestScienceObjectCodec.known_array, TestScienceObjectCodec.known_hdf_as_string = create_known( TestScienceObjectCodec.dataset_name, TestScienceObjectCodec.rootgrp_name, TestScienceObjectCodec.grp_name) TestScienceObjectCodec.known_hdf_as_sha1 = sha1( TestScienceObjectCodec.known_hdf_as_string)
def on_start(self): #these values should come in from a config file, maybe pyon.yml self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost') self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001') self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port) self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap') self.datasets_xml_path = self.get_datasets_xml_path(self.CFG) self.pydap_data_path = FileSystem.get_extended_url( self.pydap_data_path) + '/' self.ux_url = self.CFG.get_safe('system.web_ui_url', 'http://localhost:3000/') self.jenv = Environment(loader=FileSystemLoader('res/templates'), trim_blocks=True, lstrip_blocks=True) self.resource_registry = self.container.resource_registry self.setup_filesystem(self.datasets_xml_path)
def __init__(self, name = None): """ @param name The name of the dataset """ # generate a random name for the filename if it has not been provided. self.filename = FileSystem.get_url(fs=FS.TEMP, filename=name or random_name(), ext='encoder.hdf5') # Using inline imports to put off making hdf/numpy required dependencies import h5py # open an hdf file on disk - in /tmp to write data to since we can't yet do in memory log.debug("Creating h5py file object for the encoder at %s" % self.filename) if os.path.isfile(self.filename): # if file exists, then append to it self.h5pyfile = h5py.File(self.filename, mode = 'r+', driver='core') else: # if file does not already exist, write a new one self.h5pyfile = h5py.File(self.filename, mode = 'w', driver='core') assert self.h5pyfile, 'No h5py file object created.'