def sub_listen(msg, headers):

            assertions(isinstance(msg, StreamGranuleContainer),
                       'replayed message is not a granule.')
            hdf_string = msg.identifiables[data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            assertions(sha1 == msg.identifiables[encoding_id].sha1,
                       'Checksum failed.')
            assertions(
                msg.identifiables[element_count_id].value == 1,
                'record replay count is incorrect %d.' %
                msg.identifiables[element_count_id].value)
            output_file = FileSystem.mktemp()
            output_file.write(msg.identifiables[data_stream_id].values)
            output_file_path = output_file.name
            output_file.close()
            output_vectors = acquire_data([output_file_path], fields, 2).next()
            for field in fields:
                comparison = (input_vectors[field]['values'] ==
                              output_vectors[field]['values'])
                assertions(
                    comparison.all(), 'vector mismatch: %s vs %s' %
                    (input_vectors[field]['values'],
                     output_vectors[field]['values']))
            FileSystem.unlink(output_file_path)
            ar.set(True)
Exemple #2
0
        def create_known(dataset_name, rootgrp_name, grp_name):
            """
            A known array to compare against during tests
            """

            known_array = numpy.random.rand(10, 20)

            filename = FileSystem.get_url(FS.TEMP, random_name(), ".hdf5")

            # Write an hdf file with known values to compare against
            h5pyfile = h5py.File(filename, mode='w', driver='core')
            grp = h5pyfile.create_group(rootgrp_name)
            subgrp = grp.create_group(grp_name)
            dataset = subgrp.create_dataset(dataset_name,
                                            known_array.shape,
                                            known_array.dtype.str,
                                            compression='gzip',
                                            compression_opts=4,
                                            maxshape=(None, None))

            dataset.write_direct(known_array)
            h5pyfile.close()

            # convert the hdf file into a binary string
            f = open(filename, mode='rb')
            # read the binary string representation of the file
            known_hdf_as_string = f.read(
            )  # this is a known string to compare against during tests
            f.close()
            # cleaning up
            FileSystem.unlink(f.name)

            return known_array, known_hdf_as_string
        def create_known(dataset_name, rootgrp_name, grp_name):
            """
            A known array to compare against during tests
            """

            known_array = numpy.ones((10,20))

            filename = FileSystem.get_url(FS.TEMP,random_name(), ".hdf5")

            # Write an hdf file with known values to compare against
            h5pyfile = h5py.File(filename, mode = 'w', driver='core')
            grp = h5pyfile.create_group(rootgrp_name)
            subgrp = grp.create_group(grp_name)
            dataset = subgrp.create_dataset(dataset_name, known_array.shape, known_array.dtype.str, maxshape=(None,None))
            dataset.write_direct(known_array)
            h5pyfile.close()

            # convert the hdf file into a binary string
            f = open(filename, mode='rb')
            # read the binary string representation of the file
            known_hdf_as_string = f.read() # this is a known string to compare against during tests
            f.close()
            # cleaning up
            FileSystem.unlink(f.name)

            return known_array, known_hdf_as_string
    def tearDown(self):
        """
        Cleanup. Delete Subscription, Stream, Process Definition
        """

        for fname in self.fnames:
            FileSystem.unlink(fname)
    def tearDown(self):
        """
        Cleanup. Delete Subscription, Stream, Process Definition
        """

        for fname in self.fnames:
            FileSystem.unlink(fname)
Exemple #6
0
    def _force_clean(cls, recreate=False, initial=False):
        # Database resources
        from pyon.core.bootstrap import get_sys_name, CFG
        from pyon.datastore.datastore_common import DatastoreFactory
        datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE, scope=get_sys_name())
        if initial:
            datastore._init_database(datastore.database)

        dbs = datastore.list_datastores()
        clean_prefix = '%s_' % get_sys_name().lower()
        things_to_clean = [x for x in dbs if x.startswith(clean_prefix)]
        try:
            for thing in things_to_clean:
                datastore.delete_datastore(datastore_name=thing)
                if recreate:
                    datastore.create_datastore(datastore_name=thing)

        finally:
            datastore.close()

        # Broker resources
        from putil.rabbitmq.rabbit_util import RabbitManagementUtil
        rabbit_util = RabbitManagementUtil(CFG, sysname=bootstrap.get_sys_name())
        deleted_exchanges, deleted_queues = rabbit_util.clean_by_sysname()
        log.info("Deleted %s exchanges, %s queues" % (len(deleted_exchanges), len(deleted_queues)))

        # File system
        from pyon.util.file_sys import FileSystem
        FileSystem._clean(CFG)
Exemple #7
0
    def _get_time_index(self, granule, timeval):
        '''
        @brief Obtains the index where a time's value is
        @param granule must be a complete dataset (hdf_string provided)
        @param timeval the vector value
        @return Index value for timeval or closest approx such that timeval is IN the subset
        '''
        assert isinstance(granule,
                          StreamGranuleContainer), 'object is not a granule.'
        assert granule.identifiables[
            self.data_stream_id].values, 'hdf_string is not provided.'

        hdf_string = granule.identifiables[self.data_stream_id].values
        file_path = self._get_hdf_from_string(hdf_string)

        #-------------------------------------------------------------------------------------
        # Determine the field_id for the temporal coordinate vector (aka time)
        #-------------------------------------------------------------------------------------

        time_field = self.definition.identifiables[
            self.time_id].coordinate_ids[0]
        value_path = granule.identifiables[
            time_field].values_path or self.definition.identifiables[
                time_field].values_path
        record_count = granule.identifiables[self.element_count_id].value

        #-------------------------------------------------------------------------------------
        # Go through the time vector and get the indexes that correspond to the timeval
        # It will find a value such that
        # t_n <= i < t_(n+1), where i is the index
        #-------------------------------------------------------------------------------------

        var_name = value_path.split('/').pop()
        res = acquire_data([file_path], [var_name], record_count).next()
        time_vector = res[var_name]['values']
        retval = 0
        for i in xrange(len(time_vector)):
            if time_vector[i] == timeval:
                retval = i
                break
            elif i == 0 and time_vector[i] > timeval:
                retval = i
                break
            elif (i + 1) < len(time_vector):  # not last val
                if time_vector[i] < timeval and time_vector[i + 1] > timeval:
                    retval = i
                    break
            else:  # last val
                retval = i
                break
        FileSystem.unlink(file_path)
        return retval
    def on_start(self):
        #these values should come in from a config file, maybe pyon.yml
        self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
        self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')
        self.pydap_url  = 'http://%s:%s/' % (self.pydap_host, self.pydap_port)
        self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap')
        self.datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml")
        self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/'

        filename = self.datasets_xml_path.split('/')[-1]
        base = '/'.join(self.datasets_xml_path.split('/')[:-1])
        real_path = FileSystem.get_extended_url(base)
        self.datasets_xml_path = os.path.join(real_path, filename)
        self.setup_filesystem(real_path)
Exemple #9
0
    def _slice(self, granule, slice_):
        '''
        @brief Creates a granule which is a slice of the granule parameter
        @param granule the superset
        @param slice_ The slice values for which to create the granule
        @return Crafted subset granule of the parameter granule.
        '''
        retval = copy.deepcopy(granule)
        fields = self._list_data(self.definition, granule)
        record_count = slice_.stop - slice_.start
        assert record_count > 0, 'slice is malformed'
        pairs = self._pair_up(granule)
        var_names = list([i[0]
                          for i in pairs])  # Get the var_names from the pairs
        log.debug('var_names: %s', var_names)
        file_path = self._get_hdf_from_string(
            granule.identifiables[self.data_stream_id].values)
        codec = HDFEncoder()
        vectors = acquire_data([file_path], var_names, record_count,
                               slice_).next()

        for row, value in vectors.iteritems():
            vp = self._find_vp(pairs, row)
            # Determine the range_id reverse dictionary lookup
            #@todo: improve this pattern
            for field, path in fields.iteritems():
                if vp == path:
                    range_id = field
                    break
            bounds_id = retval.identifiables[range_id].bounds_id
            # Recalculate the bounds for this fields and update the granule
            range = value['range']
            retval.identifiables[bounds_id].value_pair[0] = float(range[0])
            retval.identifiables[bounds_id].value_pair[1] = float(range[1])
            codec.add_hdf_dataset(vp, value['values'])
            record_count = len(value['values'])
            #----- DEBUGGING ---------
            log.debug('slice- row: %s', row)
            log.debug('slice- value_path: %s', vp)
            log.debug('slice- range_id: %s', range_id)
            log.debug('slice- bounds_id: %s', bounds_id)
            log.debug('slice- limits: %s', value['range'])
            #-------------------------

        retval.identifiables[self.element_count_id].value = record_count
        hdf_string = codec.encoder_close()
        self._patch_granule(retval, hdf_string)
        FileSystem.unlink(file_path)
        return retval
Exemple #10
0
    def _force_clean(cls, recreate=False):
        from pyon.core.bootstrap import get_sys_name, CFG
        from pyon.datastore.couchdb.couchdb_standalone import CouchDataStore
        datastore = CouchDataStore(config=CFG)
        dbs = datastore.list_datastores()
        things_to_clean = filter(lambda x: x.startswith('%s_' % get_sys_name().lower()), dbs)
        try:
            for thing in things_to_clean:
                datastore.delete_datastore(datastore_name=thing)
                if recreate:
                    datastore.create_datastore(datastore_name=thing)

        finally:
            datastore.close()
        FileSystem._clean(CFG)
    def _get_time_index(self, granule, timeval):
        '''
        @brief Obtains the index where a time's value is
        @param granule must be a complete dataset (hdf_string provided)
        @param timeval the vector value
        @return Index value for timeval or closest approx such that timeval is IN the subset
        '''
        assert isinstance(granule, StreamGranuleContainer), 'object is not a granule.'
        assert granule.identifiables[self.data_stream_id].values, 'hdf_string is not provided.'

        hdf_string = granule.identifiables[self.data_stream_id].values
        file_path = self._get_hdf_from_string(hdf_string)

        #-------------------------------------------------------------------------------------
        # Determine the field_id for the temporal coordinate vector (aka time)
        #-------------------------------------------------------------------------------------

        time_field = self.definition.identifiables[self.time_id].coordinate_ids[0]
        value_path = granule.identifiables[time_field].values_path or self.definition.identifiables[time_field].values_path
        record_count = granule.identifiables[self.element_count_id].value

        #-------------------------------------------------------------------------------------
        # Go through the time vector and get the indexes that correspond to the timeval
        # It will find a value such that
        # t_n <= i < t_(n+1), where i is the index
        #-------------------------------------------------------------------------------------


        var_name = value_path.split('/').pop()
        res = acquire_data([file_path], [var_name], record_count).next()
        time_vector = res[var_name]['values']
        retval = 0
        for i in xrange(len(time_vector)):
            if time_vector[i] == timeval:
                retval = i
                break
            elif i==0 and time_vector[i] > timeval:
                retval = i
                break
            elif (i+1) < len(time_vector): # not last val
                if time_vector[i] < timeval and time_vector[i+1] > timeval:
                    retval = i
                    break
            else: # last val
                retval = i
                break
        FileSystem.unlink(file_path)
        return retval
Exemple #12
0
    def setUp(self):
        # This test does not start a container so we have to hack creating a FileSystem singleton instance
        FileSystem(DotDict())

        self.px_ctd = SimpleCtdPublisher()
        self.px_ctd.last_time = 0

        self.tx_L0 = ctd_L0_all()
        self.tx_L0.streams = defaultdict(Mock)
        self.tx_L0.conductivity = Mock()
        self.tx_L0.temperature = Mock()
        self.tx_L0.pressure = Mock()

        self.tx_L1_C = CTDL1ConductivityTransform()
        self.tx_L1_C.streams = defaultdict(Mock)

        self.tx_L1_T = CTDL1TemperatureTransform()
        self.tx_L1_T.streams = defaultdict(Mock)

        self.tx_L1_P = CTDL1PressureTransform()
        self.tx_L1_P.streams = defaultdict(Mock)

        self.tx_L2_S = SalinityTransform()
        self.tx_L2_S.streams = defaultdict(Mock)

        self.tx_L2_D = DensityTransform()
        self.tx_L2_D.streams = defaultdict(Mock)
Exemple #13
0
 def _splice_coverage(cls, dataset_id, scov):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     vcov = cls._get_coverage(dataset_id, mode='a')
     scov_pth = scov.persistence_dir
     if isinstance(vcov.reference_coverage, SimplexCoverage):
         ccov = ComplexCoverage(
             file_root,
             uuid4().hex,
             'Complex coverage for %s' % dataset_id,
             reference_coverage_locs=[
                 vcov.head_coverage_path,
             ],
             parameter_dictionary=ParameterDictionary(),
             complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION)
         log.info('Creating Complex Coverage: %s', ccov.persistence_dir)
         ccov.append_reference_coverage(scov_pth)
         ccov_pth = ccov.persistence_dir
         ccov.close()
         vcov.replace_reference_coverage(ccov_pth)
     elif isinstance(vcov.reference_coverage, ComplexCoverage):
         log.info('Appending simplex coverage to complex coverage')
         #vcov.reference_coverage.append_reference_coverage(scov_pth)
         dir_path = vcov.reference_coverage.persistence_dir
         vcov.close()
         ccov = AbstractCoverage.load(dir_path, mode='a')
         ccov.append_reference_coverage(scov_pth)
         ccov.refresh()
         ccov.close()
     vcov.refresh()
     vcov.close()
 def on_start(self):
     super(TransformCapture, self).on_start()
     #        #@todo: Remove debugging statements
     log.debug('(Transform: %s) Starting...', self.name)
     self.file_name = self.CFG.get_safe(
         'process.file_name', FileSystem.get_url(FS.TEMP,
                                                 'transform_output'))
 def _splice_coverage(cls, dataset_id, scov):
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     vcov = cls._get_coverage(dataset_id,mode='a')
     scov_pth = scov.persistence_dir
     if isinstance(vcov.reference_coverage, SimplexCoverage):
         ccov = ComplexCoverage(file_root, uuid4().hex, 'Complex coverage for %s' % dataset_id, 
                 reference_coverage_locs=[vcov.head_coverage_path,],
                 parameter_dictionary=ParameterDictionary(),
                 complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION)
         log.info('Creating Complex Coverage: %s', ccov.persistence_dir)
         ccov.append_reference_coverage(scov_pth)
         ccov_pth = ccov.persistence_dir
         ccov.close()
         vcov.replace_reference_coverage(ccov_pth)
     elif isinstance(vcov.reference_coverage, ComplexCoverage):
         log.info('Appending simplex coverage to complex coverage')
         #vcov.reference_coverage.append_reference_coverage(scov_pth)
         dir_path = vcov.reference_coverage.persistence_dir
         vcov.close()
         ccov = AbstractCoverage.load(dir_path, mode='a')
         ccov.append_reference_coverage(scov_pth)
         ccov.refresh()
         ccov.close()
     vcov.refresh()
     vcov.close()
    def make_some_data(self):
        import numpy as np

        stream_id = 'I am very special'
        definition = SBE37_CDM_stream_definition()
        definition.stream_resource_id = stream_id

        self.couch.create(definition)

        total = 200
        n = 10 # at most n records per granule
        i = 0

        while i < total:
            r = random.randint(1,n)

            psc = PointSupplementConstructor(point_definition=definition, stream_id=stream_id)
            for x in xrange(r):
                i+=1
                point_id = psc.add_point(time=i, location=(0,0,0))
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='temperature', value=np.random.normal(loc=48.0,scale=4.0, size=1)[0])
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='pressure', value=np.float32(1.0))
                psc.add_scalar_point_coverage(point_id=point_id, coverage_id='conductivity', value=np.float32(2.0))
            granule = psc.close_stream_granule()
            hdf_string = granule.identifiables[definition.data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            with open(FileSystem.get_hierarchical_url(FS.CACHE, '%s.hdf5' % sha1),'w') as f:
                f.write(hdf_string)
            granule.identifiables[definition.data_stream_id].values = ''
            self.couch.create(granule)
    def get_datasets_xml_path(cls, cfg):
        datasets_xml_path = cfg.get_safe('server.pydap.datasets_xml_path', 'RESOURCE:ext/datasets.xml')
        base, filename = os.path.split(datasets_xml_path)
        base = FileSystem.get_extended_url(base)
        path = os.path.join(base, filename)

        return path
 def _get_cov(self, name, nt):
     path = CFG.get_safe('server.pydap.data_path', "RESOURCE:ext/pydap")
     ext_path = FileSystem.get_extended_url(path)
     cov,filename = _make_coverage(ext_path, "the_cov")
     cov.insert_timesteps(nt) 
     cov.set_parameter_values("time", value=nt)
     return cov, filename
Exemple #19
0
    def __init__(self, name = None):
        """
        @param name The name of the dataset
        """
        # generate a random name for the filename if it has not been provided.
        self.filename = FileSystem.get_url(fs=FS.TEMP, filename=name or random_name(), ext='encoder.hdf5')

        # Using inline imports to put off making hdf/numpy required dependencies
        import h5py

        # open an hdf file on disk - in /tmp to write data to since we can't yet do in memory
        try:
            log.debug("Creating h5py file object for the encoder at %s" % self.filename)
            if os.path.isfile(self.filename):
                # if file exists, then append to it
                self.h5pyfile = h5py.File(self.filename, mode = 'r+', driver='core')
            else:
                # if file does not already exist, write a new one
                self.h5pyfile = h5py.File(self.filename, mode = 'w', driver='core')
            assert self.h5pyfile, 'No h5py file object created.'
        except IOError:
            log.debug("Error opening file for the HDFEncoder! ")
            raise HDFEncoderException("Error while trying to open file. ")
        except AssertionError as err:
            log.debug(err.message)
            raise HDFEncoderException(err.message)
 def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain,temporal_domain):
     pdict = ParameterDictionary.load(parameter_dict)
     sdom = GridDomain.load(spatial_domain)
     tdom = GridDomain.load(temporal_domain)
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     scov = SimplexCoverage(file_root,dataset_id,description or dataset_id,parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom, inline_data_writes=self.inline_data_writes)
     return scov
    def _slice(self,granule,slice_):
        '''
        @brief Creates a granule which is a slice of the granule parameter
        @param granule the superset
        @param slice_ The slice values for which to create the granule
        @return Crafted subset granule of the parameter granule.
        '''
        retval = copy.deepcopy(granule)
        fields = self._list_data(self.definition,granule)
        record_count = slice_.stop - slice_.start
        assert record_count > 0, 'slice is malformed'
        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs]) # Get the var_names from the pairs
        log.debug('var_names: %s',var_names)
        file_path = self._get_hdf_from_string(granule.identifiables[self.data_stream_id].values)
        codec = HDFEncoder()
        vectors = acquire_data([file_path],var_names,record_count,slice_ ).next()

        for row, value in vectors.iteritems():
            vp = self._find_vp(pairs, row)
            # Determine the range_id reverse dictionary lookup
            #@todo: improve this pattern
            for field,path in fields.iteritems():
                if vp==path:
                    range_id = field
                    break
            bounds_id = retval.identifiables[range_id].bounds_id
            # Recalculate the bounds for this fields and update the granule
            range = value['range']
            retval.identifiables[bounds_id].value_pair[0] = float(range[0])
            retval.identifiables[bounds_id].value_pair[1] = float(range[1])
            codec.add_hdf_dataset(vp, value['values'])
            record_count = len(value['values'])
            #----- DEBUGGING ---------
            log.debug('slice- row: %s', row)
            log.debug('slice- value_path: %s', vp)
            log.debug('slice- range_id: %s', range_id)
            log.debug('slice- bounds_id: %s', bounds_id)
            log.debug('slice- limits: %s', value['range'])
            #-------------------------


        retval.identifiables[self.element_count_id].value = record_count
        hdf_string = codec.encoder_close()
        self._patch_granule(retval, hdf_string)
        FileSystem.unlink(file_path)
        return retval
Exemple #22
0
    def __init__(self, *args, **kwargs):
        BaseContainerAgent.__init__(self, *args, **kwargs)

        self._is_started = False

        self._capabilities = []

        # set container id and cc_agent name (as they are set in base class call)
        self.id = get_default_container_id()
        self.name = "cc_agent_%s" % self.id

        Container.instance = self

        from pyon.core import bootstrap
        bootstrap.container_instance = self

        log.debug("Container (sysname=%s) initializing ..." % bootstrap.get_sys_name())

        # DatastoreManager - controls access to Datastores (both mock and couch backed)
        self.datastore_manager = DatastoreManager()

        self.datastore_manager.start()
        self._capabilities.append("DATASTORE_MANAGER")

        # Keep track of the overrides from the command-line, so they can trump app/rel file data
        self.spawn_args = kwargs

        # Instantiate Directory and self-register
        # Has the additional side effect of either
        # bootstrapping the configuration into the
        # directory or read the configuration based
        # in the value of the auto_bootstrap setting
        self.directory = Directory()

        # Create this Container's specific ExchangeManager instance
        self.ex_manager = ExchangeManager(self)

        # Create this Container's specific ProcManager instance
        self.proc_manager = ProcManager(self)

        # Create this Container's specific AppManager instance
        self.app_manager = AppManager(self)

        # File System - Interface to the OS File System, using correct path names and setups
        self.file_system = FileSystem(CFG)

        # Governance Controller - manages the governance related interceptors
        self.governance_controller = GovernanceController(self)

        # sFlow manager - controls sFlow stat emission
        self.sflow_manager = SFlowManager(self)

        # Coordinates the container start
        self._status = "INIT"

        # protection for when the container itself is used as a Process for clients
        self.container = self

        log.debug("Container initialized, OK.")
def upload_qc():
    upload_folder = FileSystem.get_url(FS.TEMP, 'uploads')
    try:

        object_store = Container.instance.object_store

        # required fields
        upload = request.files['file']  # <input type=file name="file">

        if upload:

            # upload file - run filename through werkzeug.secure_filename
            filename = secure_filename(upload.filename)
            path = os.path.join(upload_folder, filename)
            upload_time = time.time()
            upload.save(path)
            filetype = _check_magic(
                upload) or 'CSV'  # Either going to be ZIP or CSV, probably

            # register upload
            file_upload_context = {
                'name': 'User uploaded QC file %s' % filename,
                'filename': filename,
                'filetype': filetype,  # only CSV, no detection necessary
                'path': path,
                'upload_time': upload_time,
                'status': 'File uploaded to server'
            }
            fuc_id, _ = object_store.create_doc(file_upload_context)

            # client to process dispatch
            pd_client = ProcessDispatcherServiceClient()

            # create process definition
            process_definition = ProcessDefinition(
                name='upload_qc_processor',
                executable={
                    'module': 'ion.processes.data.upload.upload_qc_processing',
                    'class': 'UploadQcProcessing'
                })
            process_definition_id = pd_client.create_process_definition(
                process_definition)
            # create process
            process_id = pd_client.create_process(process_definition_id)
            #schedule process
            config = DotDict()
            config.process.fuc_id = fuc_id
            pid = pd_client.schedule_process(process_definition_id,
                                             process_id=process_id,
                                             configuration=config)
            log.info('UploadQcProcessing process created %s' % pid)
            # response - only FileUploadContext ID and determined filetype for UX display
            resp = {'fuc_id': fuc_id}
            return gateway_json_response(resp)

        raise BadRequest('Invalid Upload')

    except Exception as e:
        return build_error_response(e)
Exemple #24
0
    def _force_clean(cls, recreate=False):
        from pyon.core.bootstrap import get_sys_name, CFG
        from pyon.datastore.datastore_common import DatastoreFactory
        datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE, scope=get_sys_name())
        #datastore = DatastoreFactory.get_datastore(config=CFG, variant=DatastoreFactory.DS_BASE)

        dbs = datastore.list_datastores()
        things_to_clean = filter(lambda x: x.startswith('%s_' % get_sys_name().lower()), dbs)
        try:
            for thing in things_to_clean:
                datastore.delete_datastore(datastore_name=thing)
                if recreate:
                    datastore.create_datastore(datastore_name=thing)

        finally:
            datastore.close()
        FileSystem._clean(CFG)
    def get_datasets_xml_path(cls, cfg):
        datasets_xml_path = cfg.get_safe('server.pydap.datasets_xml_path',
                                         'RESOURCE:ext/datasets.xml')
        base, filename = os.path.split(datasets_xml_path)
        base = FileSystem.get_extended_url(base)
        path = os.path.join(base, filename)

        return path
    def refresh_datasets_xml(self):
        datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml")
        filename = datasets_xml_path.split('/')[-1]
        base = '/'.join(datasets_xml_path.split('/')[:-1])
        real_path = FileSystem.get_extended_url(base)
        datasets_xml_path = os.path.join(real_path, filename)

        os.remove(datasets_xml_path)
 def _create_coverage(self, dataset_id, parameter_dict_id, time_dom, spatial_dom):
     pd = self.dataset_management_client.read_parameter_dictionary(parameter_dict_id)
     pdict = ParameterDictionary.load(pd)
     sdom = GridDomain.load(spatial_dom.dump())
     tdom = GridDomain.load(time_dom.dump())
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     scov = SimplexCoverage(file_root, dataset_id, dataset_id, parameter_dictionary=pdict, temporal_domain=tdom, spatial_domain=sdom)
     return scov
Exemple #28
0
    def on_start(self):
        #these values should come in from a config file, maybe pyon.yml
        self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
        self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')
        self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port)
        self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path',
                                                 'RESOURCE:ext/pydap')
        self.datasets_xml_path = self.CFG.get_safe(
            'server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml")
        self.pydap_data_path = FileSystem.get_extended_url(
            self.pydap_data_path) + '/'

        filename = self.datasets_xml_path.split('/')[-1]
        base = '/'.join(self.datasets_xml_path.split('/')[:-1])
        real_path = FileSystem.get_extended_url(base)
        self.datasets_xml_path = os.path.join(real_path, filename)
        self.setup_filesystem(real_path)
 def read_persisted_cache(self, sha1, encoding):
     byte_string = None
     path = FileSystem.get_hierarchical_url(FS.CACHE,sha1,'.%s' % encoding)
     try:
         with open(path, 'r') as f:
             byte_string = f.read()
     except IOError as e:
         raise BadRequest(e.message)
     return byte_string
Exemple #30
0
    def process(self,packet):
        input = int(packet.get('num',0))
        prep = 'echo \'1+%d\' | bc' %(input)
        output = commands.getoutput(prep)
        if self.has_output:
            self.publish(dict(num=output))

        with open(FileSystem.get_url(FS.TEMP,"transform_output"),'a') as f:
            f.write('(%s): Received %s, transform: %s\n' %(self.name, packet, output))
 def _create_view_coverage(self, dataset_id, description, parent_dataset_id):
     # As annoying as it is we need to load the view coverage belonging to parent dataset id and use the information
     # inside to build the new one...
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     pscov = self._get_simplex_coverage(parent_dataset_id, mode='r')
     scov_location = pscov.persistence_dir
     pscov.close()
     vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov_location)
     return vcov
 def _create_coverage(self, dataset_id, description, parameter_dict, spatial_domain,temporal_domain):
     file_root = FileSystem.get_url(FS.CACHE,'datasets')
     pdict = ParameterDictionary.load(parameter_dict)
     sdom = GridDomain.load(spatial_domain)
     tdom = GridDomain.load(temporal_domain)
     scov = self._create_simplex_coverage(dataset_id, pdict, sdom, tdom, self.inline_data_writes)
     vcov = ViewCoverage(file_root, dataset_id, description or dataset_id, reference_coverage_location=scov.persistence_dir)
     scov.close()
     return vcov
        def sub_listen(msg, headers):

            assertions(isinstance(msg,StreamGranuleContainer),'replayed message is not a granule.')
            hdf_string = msg.identifiables[data_stream_id].values
            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            assertions(sha1 == msg.identifiables[encoding_id].sha1,'Checksum failed.')
            assertions(msg.identifiables[element_count_id].value==1, 'record replay count is incorrect %d.' % msg.identifiables[element_count_id].value)
            output_file = FileSystem.mktemp()
            output_file.write(msg.identifiables[data_stream_id].values)
            output_file_path = output_file.name
            output_file.close()
            output_vectors = acquire_data([output_file_path],fields,2).next()
            for field in fields:
                comparison = (input_vectors[field]['values']==output_vectors[field]['values'])
                assertions(comparison.all(), 'vector mismatch: %s vs %s' %
                                             (input_vectors[field]['values'],output_vectors[field]['values']))
            FileSystem.unlink(output_file_path)
            ar.set(True)
def upload_qc():
    upload_folder = FileSystem.get_url(FS.TEMP,'uploads')
    try:

        object_store = Container.instance.object_store
        
        # required fields
        upload = request.files['file'] # <input type=file name="file">

        if upload:

            # upload file - run filename through werkzeug.secure_filename
            filename = secure_filename(upload.filename)
            path = os.path.join(upload_folder, filename)
            upload_time = time.time()
            upload.save(path)
            filetype = _check_magic(upload) or 'CSV' # Either going to be ZIP or CSV, probably

            # register upload
            file_upload_context = {
                'name': 'User uploaded QC file %s' % filename,
                'filename': filename,
                'filetype': filetype,  # only CSV, no detection necessary
                'path': path,
                'upload_time': upload_time,
                'status': 'File uploaded to server'
            }
            fuc_id, _ = object_store.create_doc(file_upload_context)

            # client to process dispatch
            pd_client = ProcessDispatcherServiceClient()

            # create process definition
            process_definition = ProcessDefinition(
                name='upload_qc_processor',
                executable={
                    'module': 'ion.processes.data.upload.upload_qc_processing',
                    'class': 'UploadQcProcessing'
                }
            )
            process_definition_id = pd_client.create_process_definition(process_definition)
            # create process
            process_id = pd_client.create_process(process_definition_id)
            # schedule process
            config = DotDict()
            config.process.fuc_id = fuc_id
            pid = pd_client.schedule_process(process_definition_id, process_id=process_id, configuration=config)
            log.info('UploadQcProcessing process created %s' % pid)
            # response - only FileUploadContext ID and determined filetype for UX display
            resp = {'fuc_id': fuc_id}
            return gateway_json_response(resp)

        raise BadRequest('Invalid Upload')

    except Exception as e:
        return build_error_response(e)
Exemple #35
0
    def __init__(self, *args, **kwargs):
        BaseContainerAgent.__init__(self, *args, **kwargs)

        self._is_started = False

        # set id and name (as they are set in base class call)
        self.id = string.replace('%s_%d' % (os.uname()[1], os.getpid()), ".",
                                 "_")
        self.name = "cc_agent_%s" % self.id

        Container.instance = self

        # TODO: Bug: Replacing CFG instance not work because references are already public. Update directly
        dict_merge(CFG, kwargs, inplace=True)
        from pyon.core import bootstrap
        bootstrap.container_instance = self
        bootstrap.assert_configuration(CFG)
        log.debug("Container (sysname=%s) initializing ..." %
                  bootstrap.get_sys_name())

        # Keep track of the overrides from the command-line, so they can trump app/rel file data
        self.spawn_args = kwargs

        # Load object and service registry etc.
        bootstrap_pyon()

        # Create this Container's specific ExchangeManager instance
        self.ex_manager = ExchangeManager(self)

        # Create this Container's specific ProcManager instance
        self.proc_manager = ProcManager(self)

        # Create this Container's specific AppManager instance
        self.app_manager = AppManager(self)

        # DatastoreManager - controls access to Datastores (both mock and couch backed)
        self.datastore_manager = DatastoreManager()

        # File System - Interface to the OS File System, using correct path names and setups
        self.file_system = FileSystem(CFG)

        # Governance Controller - manages the governance related interceptors
        self.governance_controller = GovernanceController(self)

        # sFlow manager - controls sFlow stat emission
        self.sflow_manager = SFlowManager(self)

        # Coordinates the container start
        self._is_started = False
        self._capabilities = []
        self._status = "INIT"

        # protection for when the container itself is used as a Process for clients
        self.container = self

        log.debug("Container initialized, OK.")
    def process(self, packet):
        input = int(packet.get('num', 0))
        prep = 'echo \'1+%d\' | bc' % (input)
        output = commands.getoutput(prep)
        if self.has_output:
            self.publish(dict(num=output))

        with open(FileSystem.get_url(FS.TEMP, "transform_output"), 'a') as f:
            f.write('(%s): Received %s, transform: %s\n' %
                    (self.name, packet, output))
Exemple #37
0
 def _create_simplex_coverage(cls, dataset_id, parameter_dictionary,
                              spatial_domain, temporal_domain):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     scov = SimplexCoverage(file_root,
                            dataset_id,
                            'Simplex Coverage for %s' % dataset_id,
                            parameter_dictionary=parameter_dictionary,
                            temporal_domain=temporal_domain,
                            spatial_domain=spatial_domain)
     return scov
Exemple #38
0
 def _create_complex_coverage(cls, dataset_id, description, parameter_dict):
     pdict = ParameterDictionary.load(parameter_dict)
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     ccov = ComplexCoverage(
         file_root,
         dataset_id,
         'Complex Coverage for %s' % dataset_id,
         parameter_dictionary=pdict,
         complex_type=ComplexCoverageType.TEMPORAL_AGGREGATION)
     return ccov
    def on_start(self):
        #these values should come in from a config file, maybe pyon.yml
        self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
        self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')
        self.pydap_url  = 'http://%s:%s/' % (self.pydap_host, self.pydap_port)
        self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap')
        self.datasets_xml_path = self.get_datasets_xml_path(self.CFG)
        self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/'
        self.setup_filesystem(self.datasets_xml_path)

        self.ux_url = self.CFG.get_safe('system.web_ui_url','http://localhost:3000/')
 def _get_hdf_from_string(self, hdf_string):
     '''
     @param hdf_string binary string consisting of an HDF5 file.
     @return temporary file (full path) where the string was written to.
     @note client's responsible to unlink when finished.
     '''
     f = FileSystem.mktemp()
     f.write(hdf_string)
     retval = f.name
     f.close()
     return retval
Exemple #41
0
    def refresh_datasets_xml(self):
        datasets_xml_path = self.CFG.get_safe('server.pydap.datasets_xml_path', "RESOURCE:ext/datasets.xml")
        filename = datasets_xml_path.split('/')[-1]
        base = '/'.join(datasets_xml_path.split('/')[:-1])
        real_path = FileSystem.get_extended_url(base)
        datasets_xml_path = os.path.join(real_path, filename)

        try:
            os.remove(datasets_xml_path)
        except OSError:
            pass # File doesn't exist
Exemple #42
0
 def _get_hdf_from_string(self, hdf_string):
     '''
     @param hdf_string binary string consisting of an HDF5 file.
     @return temporary file (full path) where the string was written to.
     @note client's responsible to unlink when finished.
     '''
     f = FileSystem.mktemp()
     f.write(hdf_string)
     retval = f.name
     f.close()
     return retval
 def check_msg(msg, header):
     assertions(isinstance(msg, StreamGranuleContainer), 'Msg is not a container')
     hdf_string = msg.identifiables[msg.data_stream_id].values
     sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
     log.debug('Sha1 matches')
     log.debug('Dumping file so you can inspect it.')
     log.debug('Records: %d' % msg.identifiables['record_count'].value)
     with open(FileSystem.get_url(FS.TEMP,'%s.cap.hdf5' % sha1[:8]),'w') as f:
         f.write(hdf_string)
         log.debug('Stream Capture: %s', f.name)
     result.set(True)
 def on_start(self):
     #these values should come in from a config file, maybe pyon.yml
     self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
     self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')
     self.pydap_url  = 'http://%s:%s/' % (self.pydap_host, self.pydap_port)
     self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap')
     self.datasets_xml_path = self.get_datasets_xml_path(self.CFG)
     self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path) + '/'
     self.ux_url = self.CFG.get_safe('system.web_ui_url','http://localhost:3000/')
     self.jenv = Environment(loader=FileSystemLoader('res/templates'), trim_blocks=True, lstrip_blocks=True)
     self.resource_registry = self.container.resource_registry
     self.setup_filesystem(self.datasets_xml_path)
Exemple #45
0
 def check_msg(msg, header):
     assertions(isinstance(msg, StreamGranuleContainer),
                'Msg is not a container')
     hdf_string = msg.identifiables[msg.data_stream_id].values
     sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
     log.debug('Sha1 matches')
     log.debug('Dumping file so you can inspect it.')
     log.debug('Records: %d' % msg.identifiables['record_count'].value)
     with open(FileSystem.get_url(FS.TEMP, '%s.cap.hdf5' % sha1[:8]),
               'w') as f:
         f.write(hdf_string)
         log.debug('Stream Capture: %s', f.name)
     result.set(True)
 def _create_view_coverage(self, dataset_id, description,
                           parent_dataset_id):
     # As annoying as it is we need to load the view coverage belonging to parent dataset id and use the information
     # inside to build the new one...
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     pscov = self._get_simplex_coverage(parent_dataset_id, mode='r')
     scov_location = pscov.persistence_dir
     pscov.close()
     vcov = ViewCoverage(file_root,
                         dataset_id,
                         description or dataset_id,
                         reference_coverage_location=scov_location)
     return vcov
 def setUp(self):
     self._start_container()
     self.container.start_rel_from_url('res/deploy/r2deploy.yml')
     path = CFG.get_safe('server.pydap.data_path', "RESOURCE:ext/pydap")
     ext_path = FileSystem.get_extended_url(path)
     self.cov,self.filename = _make_coverage(ext_path)
     self.nt = 5
     self.cov.insert_timesteps(self.nt) 
     self.time_data = [i+1 for i in range(self.nt)]
     self.cov.set_parameter_values("time", value=self.time_data)
     host = CFG.get_safe('container.pydap_gateway.web_server.host', 'localhost')
     port = CFG.get_safe('container.pydap_gateway.web_server.port', '8001')
     self.request_url = "http://"+host+":"+str(port)+os.sep+os.path.basename(self.filename)
    def process(self, packet):
        """Processes incoming data!!!!
        """
        output = int(packet.get('num', 0)) + 1
        log.debug('(%s) Processing Packet: %s', self.name, packet)
        log.debug('(%s) Transform Complete: %s', self.name, output)

        if self.has_output:
            self.publish(dict(num=str(output)))

        with open(FileSystem.get_url(FS.TEMP, "transform_output"), 'a') as f:
            f.write('(%s): Received Packet: %s\n' % (self.name, packet))
            f.write('(%s):   - Transform - %d\n' % (self.name, output))
Exemple #49
0
    def process(self, packet):
        """Processes incoming data!!!!
        """
        output = int(packet.get('num',0)) + 1
        log.debug('(%s) Processing Packet: %s',self.name,packet)
        log.debug('(%s) Transform Complete: %s', self.name, output)

        if self.has_output:
            self.publish(dict(num=str(output)))

        with open(FileSystem.get_url(FS.TEMP,"transform_output"),'a') as f:
            f.write('(%s): Received Packet: %s\n' % (self.name,packet))
            f.write('(%s):   - Transform - %d\n' % (self.name,output))
Exemple #50
0
 def _create_coverage(self, dataset_id, description, parameter_dict,
                      spatial_domain, temporal_domain):
     pdict = ParameterDictionary.load(parameter_dict)
     sdom = GridDomain.load(spatial_domain)
     tdom = GridDomain.load(temporal_domain)
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     scov = SimplexCoverage(file_root,
                            dataset_id,
                            description or dataset_id,
                            parameter_dictionary=pdict,
                            temporal_domain=tdom,
                            spatial_domain=sdom,
                            inline_data_writes=self.inline_data_writes)
     return scov
    def on_start(self):
        super(LightweightPyDAP,self).on_start()
        self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
        self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')

        self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path', 'RESOURCE:ext/pydap')

        self.pydap_data_path = FileSystem.get_extended_url(self.pydap_data_path)

        self.app = make_app(None, self.pydap_data_path, 'ion/core/static/templates/')
        self.log = getLogger('pydap')
        self.log.write = self.log.info
        self.server = WSGIServer((self.pydap_host, int(self.pydap_port)), self.app, log=self.log)
        self.server.start()
 def _create_coverage(self, dataset_id, description, parameter_dict,
                      spatial_domain, temporal_domain):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     pdict = ParameterDictionary.load(parameter_dict)
     sdom = GridDomain.load(spatial_domain)
     tdom = GridDomain.load(temporal_domain)
     scov = self._create_simplex_coverage(dataset_id, pdict, sdom, tdom,
                                          self.inline_data_writes)
     vcov = ViewCoverage(file_root,
                         dataset_id,
                         description or dataset_id,
                         reference_coverage_location=scov.persistence_dir)
     scov.close()
     return vcov
Exemple #53
0
    def hdf_to_string(self):
        """
        Convert the temporary hdf file holding the data into a binary string. Cleanup by deleting the hdf file and
        return the binary string.

        @retval hdf_string
        """
        # Return Value
        # ------------
        # hdf_string: ''
        #
        try:
            # open the hdf5 file using python 'open()'
            f = open(self.filename, mode='rb')
            # read the binary string representation of the file
            hdf_string = f.read()
            f.close()
        except IOError:
            log.exception("Error opening binary file for reading out hdfstring in HDFEncoder. ")
            raise HDFEncoderException("Error while trying to open file. ")
        finally:
            FileSystem.unlink(self.filename)
        return hdf_string
 def _create_coverage(self, dataset_id, parameter_dict_id, time_dom,
                      spatial_dom):
     pd = self.dataset_management_client.read_parameter_dictionary(
         parameter_dict_id)
     pdict = ParameterDictionary.load(pd)
     sdom = GridDomain.load(spatial_dom.dump())
     tdom = GridDomain.load(time_dom.dump())
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     scov = SimplexCoverage(file_root,
                            dataset_id,
                            dataset_id,
                            parameter_dictionary=pdict,
                            temporal_domain=tdom,
                            spatial_domain=sdom)
     return scov
 def _create_simplex_coverage(cls,
                              dataset_id,
                              parameter_dictionary,
                              spatial_domain,
                              temporal_domain,
                              inline_data_writes=True):
     file_root = FileSystem.get_url(FS.CACHE, 'datasets')
     scov = SimplexCoverage(file_root,
                            uuid4().hex,
                            'Simplex Coverage for %s' % dataset_id,
                            parameter_dictionary=parameter_dictionary,
                            temporal_domain=temporal_domain,
                            spatial_domain=spatial_domain,
                            inline_data_writes=inline_data_writes)
     return scov
Exemple #56
0
    def setUpClass(cls):

        # This test does not start a container so we have to hack creating a FileSystem singleton instance
        FileSystem(DotDict())

        @unittest.skipIf(no_numpy_h5py, 'numpy and/or h5py not imported')
        def create_known(dataset_name, rootgrp_name, grp_name):
            """
            A known array to compare against during tests
            """

            known_array = numpy.random.rand(10, 20)

            filename = FileSystem.get_url(FS.TEMP, random_name(), ".hdf5")

            # Write an hdf file with known values to compare against
            h5pyfile = h5py.File(filename, mode='w', driver='core')
            grp = h5pyfile.create_group(rootgrp_name)
            subgrp = grp.create_group(grp_name)
            dataset = subgrp.create_dataset(dataset_name,
                                            known_array.shape,
                                            known_array.dtype.str,
                                            compression='gzip',
                                            compression_opts=4,
                                            maxshape=(None, None))

            dataset.write_direct(known_array)
            h5pyfile.close()

            # convert the hdf file into a binary string
            f = open(filename, mode='rb')
            # read the binary string representation of the file
            known_hdf_as_string = f.read(
            )  # this is a known string to compare against during tests
            f.close()
            # cleaning up
            FileSystem.unlink(f.name)

            return known_array, known_hdf_as_string

        # Use the class method to patch these attributes onto the class.
        TestScienceObjectCodec.known_array, TestScienceObjectCodec.known_hdf_as_string = create_known(
            TestScienceObjectCodec.dataset_name,
            TestScienceObjectCodec.rootgrp_name,
            TestScienceObjectCodec.grp_name)

        TestScienceObjectCodec.known_hdf_as_sha1 = sha1(
            TestScienceObjectCodec.known_hdf_as_string)
 def on_start(self):
     #these values should come in from a config file, maybe pyon.yml
     self.pydap_host = self.CFG.get_safe('server.pydap.host', 'localhost')
     self.pydap_port = self.CFG.get_safe('server.pydap.port', '8001')
     self.pydap_url = 'http://%s:%s/' % (self.pydap_host, self.pydap_port)
     self.pydap_data_path = self.CFG.get_safe('server.pydap.data_path',
                                              'RESOURCE:ext/pydap')
     self.datasets_xml_path = self.get_datasets_xml_path(self.CFG)
     self.pydap_data_path = FileSystem.get_extended_url(
         self.pydap_data_path) + '/'
     self.ux_url = self.CFG.get_safe('system.web_ui_url',
                                     'http://localhost:3000/')
     self.jenv = Environment(loader=FileSystemLoader('res/templates'),
                             trim_blocks=True,
                             lstrip_blocks=True)
     self.resource_registry = self.container.resource_registry
     self.setup_filesystem(self.datasets_xml_path)
Exemple #58
0
    def __init__(self, name = None):
        """
        @param name The name of the dataset
        """
        # generate a random name for the filename if it has not been provided.
        self.filename = FileSystem.get_url(fs=FS.TEMP, filename=name or random_name(), ext='encoder.hdf5')

        # Using inline imports to put off making hdf/numpy required dependencies
        import h5py

        # open an hdf file on disk - in /tmp to write data to since we can't yet do in memory
        log.debug("Creating h5py file object for the encoder at %s" % self.filename)
        if os.path.isfile(self.filename):
            # if file exists, then append to it
            self.h5pyfile = h5py.File(self.filename, mode = 'r+', driver='core')
        else:
            # if file does not already exist, write a new one
            self.h5pyfile = h5py.File(self.filename, mode = 'w', driver='core')
        assert self.h5pyfile, 'No h5py file object created.'