def test_out_of_band_retrieve(self):
        # Setup the environemnt
        stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset()
        self.start_ingestion(stream_id, dataset_id)
        
        # Fill the dataset
        self.publish_fake_data(stream_id, route)
        self.wait_until_we_have_enough_granules(dataset_id,40)

        # Retrieve the data
        granule = DataRetrieverService.retrieve_oob(dataset_id)
        rdt = RecordDictionaryTool.load_from_granule(granule)
        self.assertTrue((rdt['time'] == np.arange(40)).all())
    def _get_highcharts_data(self, data_product_id='', visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_hc = []

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(visualization_parameters['stride_time'])
                except TypeError: 
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound('Could not find stream definition associated with data product')
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return simplejson.dumps(empty_hc)

        # send the granule through the transform to get the google datatable
        hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('highcharts',id_only=True)
        hc_stream_def = self.clients.pubsub_management.create_stream_definition('HighCharts_out', parameter_dictionary_id=hc_pdict_id)

        hc_data_granule = VizTransformHighChartsAlgorithm.execute(retrieved_granule, params=hc_stream_def, config=visualization_parameters)

        if hc_data_granule == None:
            return simplejson.dumps(empty_hc)

        hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule)
        # Now go through this redundant step of converting the hc_data into a non numpy version
        hc_data_np = (get_safe(hc_rdt, "hc_data"))[0]
        hc_data = []

        for series in hc_data_np:
            s = {}
            for key in series:
                if key == "data":
                    s["data"] = series["data"].tolist()
                    continue
                s[key] = series[key]
            hc_data.append(s)

        # return the json version of the table
        return json.dumps(hc_data)
    def _get_google_dt(self, data_product_id='', visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_gdt = gviz_api.DataTable([('time', 'datetime', 'time')])

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(visualization_parameters['stride_time'])
                except TypeError: 
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound('Could not find stream definition associated with data product')
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return empty_gdt.ToJSon()

        # send the granule through the transform to get the google datatable
        gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters)
        if gdt_data_granule == None:
            return empty_gdt.ToJSon()

        gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        gdt_components = get_safe(gdt_rdt, 'google_dt_components')
        gdt_component = gdt_components[0]
        temp_gdt_description = gdt_component["data_description"]
        temp_gdt_content = gdt_component["data_content"]

        # adjust the 'float' time to datetime in the content
        gdt_description = [('time', 'datetime', 'time')]
        gdt_content = []
        for idx in range(1,len(temp_gdt_description)):
            temp_arr = temp_gdt_description[idx]
            if temp_arr != None and temp_arr[0] != 'time':
                if len(temp_arr) == 3:
                    gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2]))
                if len(temp_arr) == 4:
                    gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2], temp_arr[3]))


        for tempTuple in temp_gdt_content:
            # sometimes there are inexplicable empty tuples in the content. Drop them
            if tempTuple == [] or len(tempTuple) == 0:
                continue

            varTuple = []
            varTuple.append(datetime.fromtimestamp(tempTuple[0]))
            for idx in range(1,len(tempTuple)):
                varTuple.append(tempTuple[idx])

            gdt_content.append(varTuple)

        # now generate the Google datatable out of the description and content
        gdt = gviz_api.DataTable(gdt_description)
        gdt.LoadData(gdt_content)

        # return the json version of the table
        return gdt.ToJSon()
def retrieve_stream(dataset_id="", query=None):
    return DataRetrieverService.retrieve_oob(dataset_id, query)
Example #5
0
    def get_visualization_data(self, data_product_id='', visualization_parameters=None, callback='', tqx=""):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @param callback     str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        reqId = 0
        # If a reqId was passed in tqx, extract it
        if tqx:
            tqx_param_list = tqx.split(";")
            for param in tqx_param_list:
                key, value = param.split(":")
                if key == 'reqId':
                    reqId = value

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                #query['start_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['start_time'])))
                query['start_time'] = int(visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                #query['end_time'] = int(ntplib.system_to_ntp_time(float(visualization_parameters['end_time'])))
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                query['stride_time'] = int(visualization_parameters['stride_time'])
            else:
                query['stride_time'] == 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id associated with the data_product. Need it to do the data retrieval
        ds_ids,_ = self.clients.resource_registry.find_objects(data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound("Could not find dataset associated with data product")

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(ds_ids[0], query=query)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(ds_ids[0], query=query)

        if retrieved_granule is None:
            return None

        #temp_rdt = RecordDictionaryTool.load_from_granule(retrieved_granule)

        # send the granule through the transform to get the google datatable
        gdt_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name('google_dt',id_only=True)
        gdt_stream_def = self.clients.pubsub_management.create_stream_definition('gdt', parameter_dictionary_id=gdt_pdict_id)

        gdt_data_granule = VizTransformGoogleDTAlgorithm.execute(retrieved_granule, params=gdt_stream_def, config=visualization_parameters)
        if gdt_data_granule == None:
            return None

        gdt_rdt = RecordDictionaryTool.load_from_granule(gdt_data_granule)
        gdt_components = get_safe(gdt_rdt, 'google_dt_components')
        gdt_component = gdt_components[0]
        temp_gdt_description = gdt_component["data_description"]
        temp_gdt_content = gdt_component["data_content"]

        # adjust the 'float' time to datetime in the content
        gdt_description = [('time', 'datetime', 'time')]
        gdt_content = []
        for idx in range(1,len(temp_gdt_description)):
            temp_arr = temp_gdt_description[idx]
            if temp_arr != None and temp_arr[0] != 'time':
                gdt_description.append((temp_arr[0], temp_arr[1], temp_arr[2]))

        for tempTuple in temp_gdt_content:
            # sometimes there are inexplicable empty tuples in the content. Drop them
            if tempTuple == [] or len(tempTuple) == 0:
                continue

            varTuple = []
            varTuple.append(datetime.fromtimestamp(tempTuple[0]))
            for idx in range(1,len(tempTuple)):
                # some silly numpy format won't go away so need to cast numbers to floats
                if(gdt_description[idx][1] == 'number'):
                    if tempTuple[idx] == None:
                        varTuple.append(0.0)
                    else:
                        # Precision hardcoded for now. Needs to be on a per parameter basis
                        varTuple.append(round(float(tempTuple[idx]),5))
                else:
                    varTuple.append(tempTuple[idx])

            gdt_content.append(varTuple)

        # now generate the Google datatable out of the description and content
        gdt = gviz_api.DataTable(gdt_description)
        gdt.LoadData(gdt_content)

        # return the json version of the table
        if callback == '':
            return gdt.ToJSonResponse(req_id = reqId)
        else:
            return callback + "(\"" + gdt.ToJSonResponse(req_id = reqId) + "\")"
    def _get_highcharts_data(self,
                             data_product_id='',
                             visualization_parameters=None):
        """Retrieves the data for the specified DP

        @param data_product_id    str
        @param visualization_parameters    str
        @retval jsonp_visualization_data str
        @throws NotFound    object with specified id, query does not exist
        """

        # An empty dict is returned in case there is no data in coverage
        empty_hc = []

        # error check
        if not data_product_id:
            raise BadRequest("The data_product_id parameter is missing")

        use_direct_access = False
        if visualization_parameters == {}:
            visualization_parameters = None

        # Extract the parameters. Definitely init first
        query = None
        if visualization_parameters:
            #query = {'parameters':[]}
            query = {}
            # Error check and damage control. Definitely need time
            if 'parameters' in visualization_parameters and len(
                    visualization_parameters['parameters']) > 0:
                if not 'time' in visualization_parameters['parameters']:
                    visualization_parameters['parameters'].append('time')

                query['parameters'] = visualization_parameters['parameters']

            # The times passed from UI are system times so convert them to NTP
            if 'start_time' in visualization_parameters:
                query['start_time'] = int(
                    visualization_parameters['start_time'])

            if 'end_time' in visualization_parameters:
                query['end_time'] = int((visualization_parameters['end_time']))

            # stride time
            if 'stride_time' in visualization_parameters:
                try:
                    query['stride_time'] = int(
                        visualization_parameters['stride_time'])
                except TypeError:
                    # There are some (rare) situations where the AJAX request has 'null' in the request
                    # Example:
                    # {"query_type":"google_dt","parameters":[],"start_time":-2208988800,"end_time":-2208988800,"stride_time":null,"use_direct_access":0}
                    query['stride_time'] = 1
            else:
                query['stride_time'] = 1

            # direct access parameter
            if 'use_direct_access' in visualization_parameters:
                if (int(visualization_parameters['use_direct_access']) == 1):
                    use_direct_access = True
                else:
                    use_direct_access = False

        # get the dataset_id and objs associated with the data_product. Need it to do the data retrieval
        ds_ids, _ = self.clients.resource_registry.find_objects(
            data_product_id, PRED.hasDataset, RT.Dataset, True)

        if ds_ids is None or not ds_ids:
            raise NotFound(
                "Could not find dataset associated with data product")
        stream_def_ids, _ = self.clients.resource_registry.find_objects(
            data_product_id, PRED.hasStreamDefinition, id_only=True)
        if not stream_def_ids:
            raise NotFound(
                'Could not find stream definition associated with data product'
            )
        stream_def_id = stream_def_ids[0]

        if use_direct_access:
            retrieved_granule = DataRetrieverService.retrieve_oob(
                ds_ids[0], query=query, delivery_format=stream_def_id)
        else:
            #replay_granule = self.clients.data_retriever.retrieve(ds_ids[0],{'start_time':0,'end_time':2})
            retrieved_granule = self.clients.data_retriever.retrieve(
                ds_ids[0], query=query, delivery_format=stream_def_id)

        # If thereis no data, return an empty dict
        if retrieved_granule is None:
            return simplejson.dumps(empty_hc)

        # send the granule through the transform to get the google datatable
        hc_pdict_id = self.clients.dataset_management.read_parameter_dictionary_by_name(
            'highcharts', id_only=True)
        hc_stream_def = self.clients.pubsub_management.create_stream_definition(
            'HighCharts_out', parameter_dictionary_id=hc_pdict_id)

        hc_data_granule = VizTransformHighChartsAlgorithm.execute(
            retrieved_granule,
            params=hc_stream_def,
            config=visualization_parameters)

        if hc_data_granule == None:
            return simplejson.dumps(empty_hc)

        hc_rdt = RecordDictionaryTool.load_from_granule(hc_data_granule)
        # Now go through this redundant step of converting the hc_data into a non numpy version
        hc_data_np = (get_safe(hc_rdt, "hc_data"))[0]
        hc_data = []

        for series in hc_data_np:
            s = {}
            for key in series:
                if key == "data":
                    s["data"] = series["data"].tolist()
                    continue
                s[key] = series[key]
            hc_data.append(s)

        # return the json version of the table
        return json.dumps(hc_data)
def retrieve_stream(dataset_id='', query=None):
    return DataRetrieverService.retrieve_oob(dataset_id, query)