def sub_listen(msg, headers): assertions(isinstance(msg, StreamGranuleContainer), 'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1, 'Checksum failed.') assertions( msg.identifiables[element_count_id].value == 1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path], fields, 2).next() for field in fields: comparison = (input_vectors[field]['values'] == output_vectors[field]['values']) assertions( comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'], output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True)
def _get_hdf_from_string(self, hdf_string): ''' @param hdf_string binary string consisting of an HDF5 file. @return temporary file (full path) where the string was written to. @note client's responsible to unlink when finished. ''' f = FileSystem.mktemp() f.write(hdf_string) retval = f.name f.close() return retval
def sub_listen(msg, headers): assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.') assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path],fields,2).next() for field in fields: comparison = (input_vectors[field]['values']==output_vectors[field]['values']) assertions(comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'],output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True)
def __init__(self, hdf_string): """ @param hdf_string """ #try: assert isinstance(hdf_string, basestring), 'The input for instantiating the HDFDecoder object is not a string' #except AssertionError as err: # raise HDFDecoderException(err.message) #self.filename = FileSystem.get_url(fs=FS.TEMP, filename=hashlib.sha1(hdf_string).hexdigest(), ext='_decoder.hdf5') f = FileSystem.mktemp(ext='.hdf5') # save an hdf string to disk - in /tmp to so we can open it as an hdf file and read data from it f.write(hdf_string) f.close() self._list_of_datasets = [] self.filename = f.name
def test_replay_integration(self): ''' test_replay_integration ''' import numpy as np # Keep the import it's used in the vector comparison below even though pycharm says its unused. cc = self.container XP = self.XP assertions = self.assertTrue ### Every thing below here can be run as a script: log.debug('Got it') pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient(node=cc.node) dataset_management_service = DatasetManagementServiceClient(node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) datastore_name = 'dm_test_replay_integration' producer = Publisher(name=(XP,'stream producer')) ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id=XP, couch_storage=CouchStorage(datastore_name=datastore_name,datastore_profile='SCIDATA'), hdf_storage=HdfStorage(), number_of_workers=1 ) ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id ) definition = SBE37_CDM_stream_definition() data_stream_id = definition.data_stream_id encoding_id = definition.identifiables[data_stream_id].encoding_id element_count_id = definition.identifiables[data_stream_id].element_count_id stream_def_id = pubsub_management_service.create_stream_definition( container=definition ) stream_id = pubsub_management_service.create_stream( stream_definition_id=stream_def_id ) dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/dataset_by_id' ) ingestion_management_service.create_dataset_configuration( dataset_id=dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id = ingestion_configuration_id ) definition.stream_resource_id = stream_id packet = _create_packet(definition) input_file = FileSystem.mktemp() input_file.write(packet.identifiables[data_stream_id].values) input_file_path = input_file.name input_file.close() fields=[ 'conductivity', 'height', 'latitude', 'longitude', 'pressure', 'temperature', 'time' ] input_vectors = acquire_data([input_file_path],fields , 2).next() producer.publish(msg=packet, to_name=(XP,'%s.data' % stream_id)) replay_id, replay_stream_id = data_retriever_service.define_replay(dataset_id) ar = gevent.event.AsyncResult() def sub_listen(msg, headers): assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.') assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path],fields,2).next() for field in fields: comparison = (input_vectors[field]['values']==output_vectors[field]['values']) assertions(comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'],output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True) subscriber = Subscriber(name=(XP,'replay listener'),callback=sub_listen) g = gevent.Greenlet(subscriber.listen, binding='%s.data' % replay_stream_id) g.start() data_retriever_service.start_replay(replay_id) ar.get(timeout=10) FileSystem.unlink(input_file_path)
def test_replay_integration(self): ''' test_replay_integration ''' import numpy as np # Keep the import it's used in the vector comparison below even though pycharm says its unused. cc = self.container XP = self.XP assertions = self.assertTrue ### Every thing below here can be run as a script: log.debug('Got it') pubsub_management_service = PubsubManagementServiceClient(node=cc.node) ingestion_management_service = IngestionManagementServiceClient( node=cc.node) dataset_management_service = DatasetManagementServiceClient( node=cc.node) data_retriever_service = DataRetrieverServiceClient(node=cc.node) datastore_name = 'dm_test_replay_integration' producer = Publisher(name=(XP, 'stream producer')) ingestion_configuration_id = ingestion_management_service.create_ingestion_configuration( exchange_point_id=XP, couch_storage=CouchStorage(datastore_name=datastore_name, datastore_profile='SCIDATA'), hdf_storage=HdfStorage(), number_of_workers=1) ingestion_management_service.activate_ingestion_configuration( ingestion_configuration_id=ingestion_configuration_id) definition = SBE37_CDM_stream_definition() data_stream_id = definition.data_stream_id encoding_id = definition.identifiables[data_stream_id].encoding_id element_count_id = definition.identifiables[ data_stream_id].element_count_id stream_def_id = pubsub_management_service.create_stream_definition( container=definition) stream_id = pubsub_management_service.create_stream( stream_definition_id=stream_def_id) dataset_id = dataset_management_service.create_dataset( stream_id=stream_id, datastore_name=datastore_name, view_name='datasets/dataset_by_id') ingestion_management_service.create_dataset_configuration( dataset_id=dataset_id, archive_data=True, archive_metadata=True, ingestion_configuration_id=ingestion_configuration_id) definition.stream_resource_id = stream_id packet = _create_packet(definition) input_file = FileSystem.mktemp() input_file.write(packet.identifiables[data_stream_id].values) input_file_path = input_file.name input_file.close() fields = [ 'conductivity', 'height', 'latitude', 'longitude', 'pressure', 'temperature', 'time' ] input_vectors = acquire_data([input_file_path], fields, 2).next() producer.publish(msg=packet, to_name=(XP, '%s.data' % stream_id)) replay_id, replay_stream_id = data_retriever_service.define_replay( dataset_id) ar = gevent.event.AsyncResult() def sub_listen(msg, headers): assertions(isinstance(msg, StreamGranuleContainer), 'replayed message is not a granule.') hdf_string = msg.identifiables[data_stream_id].values sha1 = hashlib.sha1(hdf_string).hexdigest().upper() assertions(sha1 == msg.identifiables[encoding_id].sha1, 'Checksum failed.') assertions( msg.identifiables[element_count_id].value == 1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value) output_file = FileSystem.mktemp() output_file.write(msg.identifiables[data_stream_id].values) output_file_path = output_file.name output_file.close() output_vectors = acquire_data([output_file_path], fields, 2).next() for field in fields: comparison = (input_vectors[field]['values'] == output_vectors[field]['values']) assertions( comparison.all(), 'vector mismatch: %s vs %s' % (input_vectors[field]['values'], output_vectors[field]['values'])) FileSystem.unlink(output_file_path) ar.set(True) subscriber = Subscriber(name=(XP, 'replay listener'), callback=sub_listen) g = gevent.Greenlet(subscriber.listen, binding='%s.data' % replay_stream_id) g.start() data_retriever_service.start_replay(replay_id) ar.get(timeout=10) FileSystem.unlink(input_file_path)