def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id, mode='a')
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)
Ejemplo n.º 2
0
    def setUp(self):
        mock_clients = self._create_service_mock('data_retriever')
        self.data_retriever_service = DataRetrieverService()
        self.data_retriever_service.clients = mock_clients
        self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create
        self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association
        self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read
        self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update
        self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete
        self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association
        self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations
        self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream
        self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition
        self.data_retriever_service.container = DotDict({
            'id':
            '123',
            'spawn_process':
            Mock(),
            'proc_manager':
            DotDict({
                'terminate_process': Mock(),
                'procs': []
            }),
            'datastore_manager':
            DotDict({'get_datastore': Mock()})
        })
        self.datastore = DotDict({'query_view': Mock()})
        self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore
        self.mock_cc_spawn = self.data_retriever_service.container.spawn_process
        self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process
        self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process
        self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process
        self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset
        self.data_retriever_service.process_definition = ProcessDefinition()
        self.data_retriever_service.process_definition.executable[
            'module'] = 'ion.processes.data.replay_process'
        self.data_retriever_service.process_definition.executable[
            'class'] = 'ReplayProcess'

        self.data_retriever_service.process_definition_id = 'mock_procdef_id'
Ejemplo n.º 3
0
    def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())
Ejemplo n.º 4
0
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(
                dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0])  # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(
            dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)

        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id, route, 1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)

        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()

        self.assertTrue(event.is_set())
Ejemplo n.º 5
0
    def test_retrieve_cache(self):
        DataRetrieverService._refresh_interval = 1
        datasets = [self.make_simple_dataset() for i in xrange(10)]
        for stream_id, route, stream_def_id, dataset_id in datasets:
            coverage = DatasetManagementService._get_simplex_coverage(dataset_id)
            coverage.insert_timesteps(10)
            coverage.set_parameter_values('time', np.arange(10))
            coverage.set_parameter_values('temp', np.arange(10))

        # Verify cache hit and refresh
        dataset_ids = [i[3] for i in datasets]
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)
        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        # Verify that it was hit and it's now in there
        self.assertTrue(dataset_ids[0] in DataRetrieverService._retrieve_cache)

        gevent.sleep(DataRetrieverService._refresh_interval + 0.2)

        DataRetrieverService._get_coverage(dataset_ids[0]) # Hit the chache
        cov, age2 = DataRetrieverService._retrieve_cache[dataset_ids[0]]
        self.assertTrue(age2 != age)

        for dataset_id in dataset_ids:
            DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_ids[0] not in DataRetrieverService._retrieve_cache)

        stream_id, route, stream_def, dataset_id = datasets[0]
        self.start_ingestion(stream_id, dataset_id)
        DataRetrieverService._get_coverage(dataset_id)
        
        self.assertTrue(dataset_id in DataRetrieverService._retrieve_cache)

        DataRetrieverService._refresh_interval = 100
        self.publish_hifi(stream_id,route,1)
        self.wait_until_we_have_enough_granules(dataset_id, data_size=20)
            
 
        event = gevent.event.Event()
        with gevent.Timeout(20):
            while not event.wait(0.1):
                if dataset_id not in DataRetrieverService._retrieve_cache:
                    event.set()


        self.assertTrue(event.is_set())
Ejemplo n.º 6
0
    def setUp(self):
        mock_clients = self._create_service_mock('data_retriever')
        self.data_retriever_service = DataRetrieverService()
        self.data_retriever_service.clients = mock_clients
        self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create
        self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association
        self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read
        self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update
        self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete
        self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association
        self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations
        self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream
        self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition
        self.data_retriever_service.container = DotDict({
            'id':'123',
            'spawn_process':Mock(),
            'proc_manager':DotDict({
                'terminate_process':Mock(),
                'procs':[]
            }),
            'datastore_manager':DotDict({
                'get_datastore':Mock()
            })
        })
        self.datastore = DotDict({
            'query_view':Mock()
        })
        self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore
        self.mock_cc_spawn = self.data_retriever_service.container.spawn_process
        self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process
        self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process
        self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process
        self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset
        self.data_retriever_service.process_definition = ProcessDefinition()
        self.data_retriever_service.process_definition.executable['module'] = 'ion.processes.data.replay_process'
        self.data_retriever_service.process_definition.executable['class'] = 'ReplayProcess'

        self.data_retriever_service.process_definition_id = 'mock_procdef_id'
Ejemplo n.º 7
0
class DataRetrieverServiceTest(PyonTestCase):
    def setUp(self):
        mock_clients = self._create_service_mock('data_retriever')
        self.data_retriever_service = DataRetrieverService()
        self.data_retriever_service.clients = mock_clients
        self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create
        self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association
        self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read
        self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update
        self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete
        self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association
        self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations
        self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream
        self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition
        self.data_retriever_service.container = DotDict({
            'id':'123',
            'spawn_process':Mock(),
            'proc_manager':DotDict({
                'terminate_process':Mock(),
                'procs':[]
            }),
            'datastore_manager':DotDict({
                'get_datastore':Mock()
            })
        })
        self.datastore = DotDict({
            'query_view':Mock()
        })
        self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore
        self.mock_cc_spawn = self.data_retriever_service.container.spawn_process
        self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process
        self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process
        self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process
        self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset
        self.data_retriever_service.process_definition = ProcessDefinition()
        self.data_retriever_service.process_definition.executable['module'] = 'ion.processes.data.replay_process'
        self.data_retriever_service.process_definition.executable['class'] = 'ReplayProcess'

        self.data_retriever_service.process_definition_id = 'mock_procdef_id'

    @unittest.skip('Can we mock the datastore manager?')
    def test_define_replay(self):
        #mocks
        self.mock_ps_create_stream.return_value = '12345'
        self.mock_rr_create.return_value = ('replay_id','garbage')
        self.mock_ds_read.return_value = DotDict({
            'datastore_name':'unittest',
            'view_name':'garbage',
            'primary_view_key':'primary key'})

        document = DotDict({'stream_resource_id':'0'})
        self.mock_pd_schedule.return_value = 'process_id'

        self.datastore.query_view.return_value = [{'doc':document}]

        config = {'process':{
            'query':'myquery',
            'datastore_name':'unittest',
            'view_name':'garbage',
            'key_id':'primary key',
            'delivery_format':None,
            'publish_streams':{'output':'12345'}
        }}


        # execution
        r,s = self.data_retriever_service.define_replay(dataset_id='dataset_id', query='myquery')

        # assertions
        self.assertTrue(self.mock_ps_create_stream_definition.called)
        self.assertTrue(self.mock_ps_create_stream.called)
        self.assertTrue(self.mock_rr_create.called)
        self.mock_rr_create_assoc.assert_called_with('replay_id',PRED.hasStream,'12345',None)
        self.assertTrue(self.mock_pd_schedule.called)
        self.assertTrue(self.mock_rr_update.called)
        self.assertEquals(r,'replay_id')
        self.assertEquals(s,'12345')

    def test_define_replay_no_data(self):
        #mocks
        self.mock_ps_create_stream.return_value = '12345'
        self.mock_rr_create.return_value = ('replay_id','garbage')
        self.mock_ds_read.return_value = DotDict({
            'datastore_name':'unittest',
            'view_name':'garbage',
            'primary_view_key':'primary key'})

        document = DotDict({'stream_resource_id':'0'})
        self.mock_pd_schedule.return_value = 'process_id'

        self.datastore.query_view.return_value = [] # Raises index error

        config = {'process':{
            'query':'myquery',
            'datastore_name':'unittest',
            'view_name':'garbage',
            'key_id':'primary key',
            'delivery_format':None,
            'publish_streams':{'output':'12345'}
        }}


        with self.assertRaises(NotFound):
            self.data_retriever_service.define_replay(dataset_id='dataset_id', query='myquery')


    @unittest.skip('Can\'t do unit test here')
    def test_start_replay(self):
        pass


    def test_cancel_replay(self):
        #mocks
        self.mock_rr_find_assoc.return_value = [1,2,3]

        replay = Replay()
        replay.process_id = '1'
        self.mock_rr_read.return_value = replay

        #execution
        self.data_retriever_service.cancel_replay('replay_id')

        #assertions
        self.assertEquals(self.mock_rr_delete_assoc.call_count,3)
        self.mock_rr_delete.assert_called_with('replay_id')

        self.mock_pd_cancel.assert_called_with('1')
    def _get_highcharts_data(self, data_product_id='', visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_hc = []

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(visualization_parameters['stride_time'])
                except TypeError: 
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound('Could not find stream definition associated with data product')
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return simplejson.dumps(empty_hc)

        # send the granule through the transform to get the google datatable
        hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('highcharts',id_only=True)
        hc_stream_def = self.clients.pubsub_management.create_stream_definition('HighCharts_out', parameter_dictionary_id=hc_pdict_id)

        hc_data_granule = VizTransformHighChartsAlgorithm.execute(retrieved_granule, params=hc_stream_def, config=visualization_parameters)

        if hc_data_granule == None:
            return simplejson.dumps(empty_hc)

        hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule)
        # Now go through this redundant step of converting the hc_data into a non numpy version
        hc_data_np = (get_safe(hc_rdt, "hc_data"))[0]
        hc_data = []

        for series in hc_data_np:
            s = {}
            for key in series:
                if key == "data":
                    s["data"] = series["data"].tolist()
                    continue
                s[key] = series[key]
            hc_data.append(s)

        # return the json version of the table
        return json.dumps(hc_data)
Ejemplo n.º 9
0
    def _get_google_dt(self, data_product_id='', visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_gdt = gviz_api.DataTable([('time', 'datetime', 'time')])

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(visualization_parameters['stride_time'])
                except TypeError: 
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound('Could not find stream definition associated with data product')
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return empty_gdt.ToJSon()

        # send the granule through the transform to get the google datatable
        gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters)
        if gdt_data_granule == None:
            return empty_gdt.ToJSon()

        gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        gdt_components = get_safe(gdt_rdt, 'google_dt_components')
        gdt_component = gdt_components[0]
        temp_gdt_description = gdt_component["data_description"]
        temp_gdt_content = gdt_component["data_content"]

        # adjust the 'float' time to datetime in the content
        gdt_description = [('time', 'datetime', 'time')]
        gdt_content = []
        for idx in range(1,len(temp_gdt_description)):
            temp_arr = temp_gdt_description[idx]
            if temp_arr != None and temp_arr[0] != 'time':
                if len(temp_arr) == 3:
                    gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2]))
                if len(temp_arr) == 4:
                    gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2], temp_arr[3]))


        for tempTuple in temp_gdt_content:
            # sometimes there are inexplicable empty tuples in the content. Drop them
            if tempTuple == [] or len(tempTuple) == 0:
                continue

            varTuple = []
            varTuple.append(datetime.fromtimestamp(tempTuple[0]))
            for idx in range(1,len(tempTuple)):
                varTuple.append(tempTuple[idx])

            gdt_content.append(varTuple)

        # now generate the Google datatable out of the description and content
        gdt = gviz_api.DataTable(gdt_description)
        gdt.LoadData(gdt_content)

        # return the json version of the table
        return gdt.ToJSon()
def retrieve_stream(dataset_id="", query=None):
    return DataRetrieverService.retrieve_oob(dataset_id, query)
Ejemplo n.º 11
0
    def get_visualization_data(self, data_product_id='', visualization_parameters=None, callback='', tqx=""):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @param callback     str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        reqId = 0
        # If a reqId was passed in tqx, extract it
        if tqx:
            tqx_param_list = tqx.split(";")
            for param in tqx_param_list:
                key, value = param.split(":")
                if key == 'reqId':
                    reqId = value

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                #query['start_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['start_time'])))
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                #query['end_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['end_time'])))
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                query['stride_time'] = int(visualization_parameters['stride_time'])
            else:
                query['stride_time'] == 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query)

        if retrieved_granule is None:
            return None

        #temp_rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)

        # send the granule through the transform to get the google datatable
        gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters)
        if gdt_data_granule == None:
            return None

        gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        gdt_components = get_safe(gdt_rdt, 'google_dt_components')
        gdt_component = gdt_components[0]
        temp_gdt_description = gdt_component["data_description"]
        temp_gdt_content = gdt_component["data_content"]

        # adjust the 'float' time to datetime in the content
        gdt_description = [('time', 'datetime', 'time')]
        gdt_content = []
        for idx in range(1,len(temp_gdt_description)):
            temp_arr = temp_gdt_description[idx]
            if temp_arr != None and temp_arr[0] != 'time':
                gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2]))

        for tempTuple in temp_gdt_content:
            # sometimes there are inexplicable empty tuples in the content. Drop them
            if tempTuple == [] or len(tempTuple) == 0:
                continue

            varTuple = []
            varTuple.append(datetime.fromtimestamp(tempTuple[0]))
            for idx in range(1,len(tempTuple)):
                # some silly numpy format won't go away so need to cast numbers to floats
                if(gdt_description[idx][1] == 'number'):
                    if tempTuple[idx] == None:
                        varTuple.append(0.0)
                    else:
                        # Precision hardcoded for now. Needs to be on a per parameter basis
                        varTuple.append(round(float(tempTuple[idx]),5))
                else:
                    varTuple.append(tempTuple[idx])

            gdt_content.append(varTuple)

        # now generate the Google datatable out of the description and content
        gdt = gviz_api.DataTable(gdt_description)
        gdt.LoadData(gdt_content)

        # return the json version of the table
        if callback == '':
            return gdt.ToJSonResponse(req_id = reqId)
        else:
            return callback + "(\"" + gdt.ToJSonResponse(req_id = reqId) + "\")"
Ejemplo n.º 12
0
 def test_transform_data(self):
     module = __name__
     cls = 'FakeTransform'
     retval = DataRetrieverService._transform_data(0, module, cls)
     self.assertEquals(retval, 1)
Ejemplo n.º 13
0
    def _get_highcharts_data(self,
                             data_product_id='',
                             visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_hc = []

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(
                    visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(
                    visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(
                        visualization_parameters['stride_time'])
                except TypeError:
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids, _ = self.clients.resource_registry.find_objects(
            data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound(
                "Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(
            data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound(
                'Could not find stream definition associated with data product'
            )
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(
                ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(
                ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return simplejson.dumps(empty_hc)

        # send the granule through the transform to get the google datatable
        hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name(
            'highcharts', id_only=True)
        hc_stream_def = self.clients.pubsub_management.create_stream_definition(
            'HighCharts_out', parameter_dictionary_id=hc_pdict_id)

        hc_data_granule = VizTransformHighChartsAlgorithm.execute(
            retrieved_granule,
            params=hc_stream_def,
            config=visualization_parameters)

        if hc_data_granule == None:
            return simplejson.dumps(empty_hc)

        hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule)
        # Now go through this redundant step of converting the hc_data into a non numpy version
        hc_data_np = (get_safe(hc_rdt, "hc_data"))[0]
        hc_data = []

        for series in hc_data_np:
            s = {}
            for key in series:
                if key == "data":
                    s["data"] = series["data"].tolist()
                    continue
                s[key] = series[key]
            hc_data.append(s)

        # return the json version of the table
        return json.dumps(hc_data)
Ejemplo n.º 14
0
 def test_transform_data(self):
     module = __name__
     cls = 'FakeTransform'
     retval = DataRetrieverService._transform_data(0,module,cls)
     self.assertEquals(retval,1)
Ejemplo n.º 15
0
class DataRetrieverServiceTest(PyonTestCase):
    def setUp(self):
        mock_clients = self._create_service_mock('data_retriever')
        self.data_retriever_service = DataRetrieverService()
        self.data_retriever_service.clients = mock_clients
        self.mock_rr_create = self.data_retriever_service.clients.resource_registry.create
        self.mock_rr_create_assoc = self.data_retriever_service.clients.resource_registry.create_association
        self.mock_rr_read = self.data_retriever_service.clients.resource_registry.read
        self.mock_rr_update = self.data_retriever_service.clients.resource_registry.update
        self.mock_rr_delete = self.data_retriever_service.clients.resource_registry.delete
        self.mock_rr_delete_assoc = self.data_retriever_service.clients.resource_registry.delete_association
        self.mock_rr_find_assoc = self.data_retriever_service.clients.resource_registry.find_associations
        self.mock_ps_create_stream = self.data_retriever_service.clients.pubsub_management.create_stream
        self.mock_ps_create_stream_definition = self.data_retriever_service.clients.pubsub_management.create_stream_definition
        self.data_retriever_service.container = DotDict({
            'id':
            '123',
            'spawn_process':
            Mock(),
            'proc_manager':
            DotDict({
                'terminate_process': Mock(),
                'procs': []
            }),
            'datastore_manager':
            DotDict({'get_datastore': Mock()})
        })
        self.datastore = DotDict({'query_view': Mock()})
        self.data_retriever_service.container.datastore_manager.get_datastore.return_value = self.datastore
        self.mock_cc_spawn = self.data_retriever_service.container.spawn_process
        self.mock_cc_terminate = self.data_retriever_service.container.proc_manager.terminate_process
        self.mock_pd_schedule = self.data_retriever_service.clients.process_dispatcher.schedule_process
        self.mock_pd_cancel = self.data_retriever_service.clients.process_dispatcher.cancel_process
        self.mock_ds_read = self.data_retriever_service.clients.dataset_management.read_dataset
        self.data_retriever_service.process_definition = ProcessDefinition()
        self.data_retriever_service.process_definition.executable[
            'module'] = 'ion.processes.data.replay_process'
        self.data_retriever_service.process_definition.executable[
            'class'] = 'ReplayProcess'

        self.data_retriever_service.process_definition_id = 'mock_procdef_id'

    @unittest.skip('Can we mock the datastore manager?')
    def test_define_replay(self):
        #mocks
        self.mock_ps_create_stream.return_value = '12345'
        self.mock_rr_create.return_value = ('replay_id', 'garbage')
        self.mock_ds_read.return_value = DotDict({
            'datastore_name':
            'unittest',
            'view_name':
            'garbage',
            'primary_view_key':
            'primary key'
        })

        document = DotDict({'stream_resource_id': '0'})
        self.mock_pd_schedule.return_value = 'process_id'

        self.datastore.query_view.return_value = [{'doc': document}]

        config = {
            'process': {
                'query': 'myquery',
                'datastore_name': 'unittest',
                'view_name': 'garbage',
                'key_id': 'primary key',
                'delivery_format': None,
                'publish_streams': {
                    'output': '12345'
                }
            }
        }

        # execution
        r, s = self.data_retriever_service.define_replay(
            dataset_id='dataset_id', query='myquery')

        # assertions
        self.assertTrue(self.mock_ps_create_stream_definition.called)
        self.assertTrue(self.mock_ps_create_stream.called)
        self.assertTrue(self.mock_rr_create.called)
        self.mock_rr_create_assoc.assert_called_with('replay_id',
                                                     PRED.hasStream, '12345',
                                                     None)
        self.assertTrue(self.mock_pd_schedule.called)
        self.assertTrue(self.mock_rr_update.called)
        self.assertEquals(r, 'replay_id')
        self.assertEquals(s, '12345')

    def test_define_replay_no_data(self):
        #mocks
        self.mock_ps_create_stream.return_value = '12345'
        self.mock_rr_create.return_value = ('replay_id', 'garbage')
        self.mock_ds_read.return_value = DotDict({
            'datastore_name':
            'unittest',
            'view_name':
            'garbage',
            'primary_view_key':
            'primary key'
        })

        document = DotDict({'stream_resource_id': '0'})
        self.mock_pd_schedule.return_value = 'process_id'

        self.datastore.query_view.return_value = []  # Raises index error

        config = {
            'process': {
                'query': 'myquery',
                'datastore_name': 'unittest',
                'view_name': 'garbage',
                'key_id': 'primary key',
                'delivery_format': None,
                'publish_streams': {
                    'output': '12345'
                }
            }
        }

        with self.assertRaises(NotFound):
            self.data_retriever_service.define_replay(dataset_id='dataset_id',
                                                      query='myquery')

    @unittest.skip('Can\'t do unit test here')
    def test_start_replay(self):
        pass

    def test_cancel_replay(self):
        #mocks
        self.mock_rr_find_assoc.return_value = [1, 2, 3]

        replay = Replay()
        replay.process_id = '1'
        self.mock_rr_read.return_value = replay

        #execution
        self.data_retriever_service.cancel_replay('replay_id')

        #assertions
        self.assertEquals(self.mock_rr_delete_assoc.call_count, 3)
        self.mock_rr_delete.assert_called_with('replay_id')

        self.mock_pd_cancel.assert_called_with('1')
def retrieve_stream(dataset_id='', query=None):
    return DataRetrieverService.retrieve_oob(dataset_id, query)